From 2357672c54c3f748f675446f8eba8b0432b1e7e2 Mon Sep 17 00:00:00 2001 From: Kumar Kartikeya Dwivedi Date: Sat, 2 Oct 2021 06:47:49 +0530 Subject: [PATCH 001/181] bpf: Introduce BPF support for kernel module function calls This change adds support on the kernel side to allow for BPF programs to call kernel module functions. Userspace will prepare an array of module BTF fds that is passed in during BPF_PROG_LOAD using fd_array parameter. In the kernel, the module BTFs are placed in the auxilliary struct for bpf_prog, and loaded as needed. The verifier then uses insn->off to index into the fd_array. insn->off 0 is reserved for vmlinux BTF (for backwards compat), so userspace must use an fd_array index > 0 for module kfunc support. kfunc_btf_tab is sorted based on offset in an array, and each offset corresponds to one descriptor, with a max limit up to 256 such module BTFs. We also change existing kfunc_tab to distinguish each element based on imm, off pair as each such call will now be distinct. Another change is to check_kfunc_call callback, which now include a struct module * pointer, this is to be used in later patch such that the kfunc_id and module pointer are matched for dynamically registered BTF sets from loadable modules, so that same kfunc_id in two modules doesn't lead to check_kfunc_call succeeding. For the duration of the check_kfunc_call, the reference to struct module exists, as it returns the pointer stored in kfunc_btf_tab. Signed-off-by: Kumar Kartikeya Dwivedi Signed-off-by: Alexei Starovoitov Link: https://lore.kernel.org/bpf/20211002011757.311265-2-memxor@gmail.com --- include/linux/bpf.h | 8 +- include/linux/bpf_verifier.h | 2 + kernel/bpf/core.c | 4 + kernel/bpf/verifier.c | 202 ++++++++++++++++++++++++++++++----- net/bpf/test_run.c | 2 +- net/ipv4/bpf_tcp_ca.c | 2 +- 6 files changed, 188 insertions(+), 32 deletions(-) diff --git a/include/linux/bpf.h b/include/linux/bpf.h index 1c7fd7c4c6d35..d604c8251d889 100644 --- a/include/linux/bpf.h +++ b/include/linux/bpf.h @@ -513,7 +513,7 @@ struct bpf_verifier_ops { const struct btf_type *t, int off, int size, enum bpf_access_type atype, u32 *next_btf_id); - bool (*check_kfunc_call)(u32 kfunc_btf_id); + bool (*check_kfunc_call)(u32 kfunc_btf_id, struct module *owner); }; struct bpf_prog_offload_ops { @@ -877,6 +877,7 @@ struct bpf_prog_aux { void *jit_data; /* JIT specific data. arch dependent */ struct bpf_jit_poke_descriptor *poke_tab; struct bpf_kfunc_desc_tab *kfunc_tab; + struct bpf_kfunc_btf_tab *kfunc_btf_tab; u32 size_poke_tab; struct bpf_ksym ksym; const struct bpf_prog_ops *ops; @@ -1639,7 +1640,7 @@ int bpf_prog_test_run_raw_tp(struct bpf_prog *prog, int bpf_prog_test_run_sk_lookup(struct bpf_prog *prog, const union bpf_attr *kattr, union bpf_attr __user *uattr); -bool bpf_prog_test_check_kfunc_call(u32 kfunc_id); +bool bpf_prog_test_check_kfunc_call(u32 kfunc_id, struct module *owner); bool btf_ctx_access(int off, int size, enum bpf_access_type type, const struct bpf_prog *prog, struct bpf_insn_access_aux *info); @@ -1860,7 +1861,8 @@ static inline int bpf_prog_test_run_sk_lookup(struct bpf_prog *prog, return -ENOTSUPP; } -static inline bool bpf_prog_test_check_kfunc_call(u32 kfunc_id) +static inline bool bpf_prog_test_check_kfunc_call(u32 kfunc_id, + struct module *owner) { return false; } diff --git a/include/linux/bpf_verifier.h b/include/linux/bpf_verifier.h index 5424124dbe365..c8a78e830fcab 100644 --- a/include/linux/bpf_verifier.h +++ b/include/linux/bpf_verifier.h @@ -527,5 +527,7 @@ int bpf_check_attach_target(struct bpf_verifier_log *log, const struct bpf_prog *tgt_prog, u32 btf_id, struct bpf_attach_target_info *tgt_info); +void bpf_free_kfunc_btf_tab(struct bpf_kfunc_btf_tab *tab); + #endif /* _LINUX_BPF_VERIFIER_H */ diff --git a/kernel/bpf/core.c b/kernel/bpf/core.c index ea8a468dbdedf..b6c72af64d5df 100644 --- a/kernel/bpf/core.c +++ b/kernel/bpf/core.c @@ -32,6 +32,7 @@ #include #include #include +#include #include #include @@ -2255,6 +2256,9 @@ static void bpf_prog_free_deferred(struct work_struct *work) int i; aux = container_of(work, struct bpf_prog_aux, work); +#ifdef CONFIG_BPF_SYSCALL + bpf_free_kfunc_btf_tab(aux->kfunc_btf_tab); +#endif bpf_free_used_maps(aux); bpf_free_used_btfs(aux); if (bpf_prog_is_dev_bound(aux)) diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c index 1433752db7403..1d6d10265cab2 100644 --- a/kernel/bpf/verifier.c +++ b/kernel/bpf/verifier.c @@ -1640,52 +1640,173 @@ static int add_subprog(struct bpf_verifier_env *env, int off) return env->subprog_cnt - 1; } +#define MAX_KFUNC_DESCS 256 +#define MAX_KFUNC_BTFS 256 + struct bpf_kfunc_desc { struct btf_func_model func_model; u32 func_id; s32 imm; + u16 offset; +}; + +struct bpf_kfunc_btf { + struct btf *btf; + struct module *module; + u16 offset; }; -#define MAX_KFUNC_DESCS 256 struct bpf_kfunc_desc_tab { struct bpf_kfunc_desc descs[MAX_KFUNC_DESCS]; u32 nr_descs; }; -static int kfunc_desc_cmp_by_id(const void *a, const void *b) +struct bpf_kfunc_btf_tab { + struct bpf_kfunc_btf descs[MAX_KFUNC_BTFS]; + u32 nr_descs; +}; + +static int kfunc_desc_cmp_by_id_off(const void *a, const void *b) { const struct bpf_kfunc_desc *d0 = a; const struct bpf_kfunc_desc *d1 = b; /* func_id is not greater than BTF_MAX_TYPE */ - return d0->func_id - d1->func_id; + return d0->func_id - d1->func_id ?: d0->offset - d1->offset; +} + +static int kfunc_btf_cmp_by_off(const void *a, const void *b) +{ + const struct bpf_kfunc_btf *d0 = a; + const struct bpf_kfunc_btf *d1 = b; + + return d0->offset - d1->offset; } static const struct bpf_kfunc_desc * -find_kfunc_desc(const struct bpf_prog *prog, u32 func_id) +find_kfunc_desc(const struct bpf_prog *prog, u32 func_id, u16 offset) { struct bpf_kfunc_desc desc = { .func_id = func_id, + .offset = offset, }; struct bpf_kfunc_desc_tab *tab; tab = prog->aux->kfunc_tab; return bsearch(&desc, tab->descs, tab->nr_descs, - sizeof(tab->descs[0]), kfunc_desc_cmp_by_id); + sizeof(tab->descs[0]), kfunc_desc_cmp_by_id_off); +} + +static struct btf *__find_kfunc_desc_btf(struct bpf_verifier_env *env, + s16 offset, struct module **btf_modp) +{ + struct bpf_kfunc_btf kf_btf = { .offset = offset }; + struct bpf_kfunc_btf_tab *tab; + struct bpf_kfunc_btf *b; + struct module *mod; + struct btf *btf; + int btf_fd; + + tab = env->prog->aux->kfunc_btf_tab; + b = bsearch(&kf_btf, tab->descs, tab->nr_descs, + sizeof(tab->descs[0]), kfunc_btf_cmp_by_off); + if (!b) { + if (tab->nr_descs == MAX_KFUNC_BTFS) { + verbose(env, "too many different module BTFs\n"); + return ERR_PTR(-E2BIG); + } + + if (bpfptr_is_null(env->fd_array)) { + verbose(env, "kfunc offset > 0 without fd_array is invalid\n"); + return ERR_PTR(-EPROTO); + } + + if (copy_from_bpfptr_offset(&btf_fd, env->fd_array, + offset * sizeof(btf_fd), + sizeof(btf_fd))) + return ERR_PTR(-EFAULT); + + btf = btf_get_by_fd(btf_fd); + if (IS_ERR(btf)) + return btf; + + if (!btf_is_module(btf)) { + verbose(env, "BTF fd for kfunc is not a module BTF\n"); + btf_put(btf); + return ERR_PTR(-EINVAL); + } + + mod = btf_try_get_module(btf); + if (!mod) { + btf_put(btf); + return ERR_PTR(-ENXIO); + } + + b = &tab->descs[tab->nr_descs++]; + b->btf = btf; + b->module = mod; + b->offset = offset; + + sort(tab->descs, tab->nr_descs, sizeof(tab->descs[0]), + kfunc_btf_cmp_by_off, NULL); + } + if (btf_modp) + *btf_modp = b->module; + return b->btf; } -static int add_kfunc_call(struct bpf_verifier_env *env, u32 func_id) +void bpf_free_kfunc_btf_tab(struct bpf_kfunc_btf_tab *tab) +{ + if (!tab) + return; + + while (tab->nr_descs--) { + module_put(tab->descs[tab->nr_descs].module); + btf_put(tab->descs[tab->nr_descs].btf); + } + kfree(tab); +} + +static struct btf *find_kfunc_desc_btf(struct bpf_verifier_env *env, + u32 func_id, s16 offset, + struct module **btf_modp) +{ + struct btf *kfunc_btf; + + if (offset) { + if (offset < 0) { + /* In the future, this can be allowed to increase limit + * of fd index into fd_array, interpreted as u16. + */ + verbose(env, "negative offset disallowed for kernel module function call\n"); + return ERR_PTR(-EINVAL); + } + + kfunc_btf = __find_kfunc_desc_btf(env, offset, btf_modp); + if (IS_ERR_OR_NULL(kfunc_btf)) { + verbose(env, "cannot find module BTF for func_id %u\n", func_id); + return kfunc_btf ?: ERR_PTR(-ENOENT); + } + return kfunc_btf; + } + return btf_vmlinux ?: ERR_PTR(-ENOENT); +} + +static int add_kfunc_call(struct bpf_verifier_env *env, u32 func_id, s16 offset) { const struct btf_type *func, *func_proto; + struct bpf_kfunc_btf_tab *btf_tab; struct bpf_kfunc_desc_tab *tab; struct bpf_prog_aux *prog_aux; struct bpf_kfunc_desc *desc; const char *func_name; + struct btf *desc_btf; unsigned long addr; int err; prog_aux = env->prog->aux; tab = prog_aux->kfunc_tab; + btf_tab = prog_aux->kfunc_btf_tab; if (!tab) { if (!btf_vmlinux) { verbose(env, "calling kernel function is not supported without CONFIG_DEBUG_INFO_BTF\n"); @@ -1713,7 +1834,20 @@ static int add_kfunc_call(struct bpf_verifier_env *env, u32 func_id) prog_aux->kfunc_tab = tab; } - if (find_kfunc_desc(env->prog, func_id)) + if (!btf_tab && offset) { + btf_tab = kzalloc(sizeof(*btf_tab), GFP_KERNEL); + if (!btf_tab) + return -ENOMEM; + prog_aux->kfunc_btf_tab = btf_tab; + } + + desc_btf = find_kfunc_desc_btf(env, func_id, offset, NULL); + if (IS_ERR(desc_btf)) { + verbose(env, "failed to find BTF for kernel function\n"); + return PTR_ERR(desc_btf); + } + + if (find_kfunc_desc(env->prog, func_id, offset)) return 0; if (tab->nr_descs == MAX_KFUNC_DESCS) { @@ -1721,20 +1855,20 @@ static int add_kfunc_call(struct bpf_verifier_env *env, u32 func_id) return -E2BIG; } - func = btf_type_by_id(btf_vmlinux, func_id); + func = btf_type_by_id(desc_btf, func_id); if (!func || !btf_type_is_func(func)) { verbose(env, "kernel btf_id %u is not a function\n", func_id); return -EINVAL; } - func_proto = btf_type_by_id(btf_vmlinux, func->type); + func_proto = btf_type_by_id(desc_btf, func->type); if (!func_proto || !btf_type_is_func_proto(func_proto)) { verbose(env, "kernel function btf_id %u does not have a valid func_proto\n", func_id); return -EINVAL; } - func_name = btf_name_by_offset(btf_vmlinux, func->name_off); + func_name = btf_name_by_offset(desc_btf, func->name_off); addr = kallsyms_lookup_name(func_name); if (!addr) { verbose(env, "cannot find address for kernel function %s\n", @@ -1745,12 +1879,13 @@ static int add_kfunc_call(struct bpf_verifier_env *env, u32 func_id) desc = &tab->descs[tab->nr_descs++]; desc->func_id = func_id; desc->imm = BPF_CALL_IMM(addr); - err = btf_distill_func_proto(&env->log, btf_vmlinux, + desc->offset = offset; + err = btf_distill_func_proto(&env->log, desc_btf, func_proto, func_name, &desc->func_model); if (!err) sort(tab->descs, tab->nr_descs, sizeof(tab->descs[0]), - kfunc_desc_cmp_by_id, NULL); + kfunc_desc_cmp_by_id_off, NULL); return err; } @@ -1829,7 +1964,7 @@ static int add_subprog_and_kfunc(struct bpf_verifier_env *env) } else if (bpf_pseudo_call(insn)) { ret = add_subprog(env, i + insn->imm + 1); } else { - ret = add_kfunc_call(env, insn->imm); + ret = add_kfunc_call(env, insn->imm, insn->off); } if (ret < 0) @@ -2166,12 +2301,17 @@ static int get_prev_insn_idx(struct bpf_verifier_state *st, int i, static const char *disasm_kfunc_name(void *data, const struct bpf_insn *insn) { const struct btf_type *func; + struct btf *desc_btf; if (insn->src_reg != BPF_PSEUDO_KFUNC_CALL) return NULL; - func = btf_type_by_id(btf_vmlinux, insn->imm); - return btf_name_by_offset(btf_vmlinux, func->name_off); + desc_btf = find_kfunc_desc_btf(data, insn->imm, insn->off, NULL); + if (IS_ERR(desc_btf)) + return ""; + + func = btf_type_by_id(desc_btf, insn->imm); + return btf_name_by_offset(desc_btf, func->name_off); } /* For given verifier state backtrack_insn() is called from the last insn to @@ -6530,23 +6670,29 @@ static int check_kfunc_call(struct bpf_verifier_env *env, struct bpf_insn *insn) struct bpf_reg_state *regs = cur_regs(env); const char *func_name, *ptr_type_name; u32 i, nargs, func_id, ptr_type_id; + struct module *btf_mod = NULL; const struct btf_param *args; + struct btf *desc_btf; int err; + desc_btf = find_kfunc_desc_btf(env, insn->imm, insn->off, &btf_mod); + if (IS_ERR(desc_btf)) + return PTR_ERR(desc_btf); + func_id = insn->imm; - func = btf_type_by_id(btf_vmlinux, func_id); - func_name = btf_name_by_offset(btf_vmlinux, func->name_off); - func_proto = btf_type_by_id(btf_vmlinux, func->type); + func = btf_type_by_id(desc_btf, func_id); + func_name = btf_name_by_offset(desc_btf, func->name_off); + func_proto = btf_type_by_id(desc_btf, func->type); if (!env->ops->check_kfunc_call || - !env->ops->check_kfunc_call(func_id)) { + !env->ops->check_kfunc_call(func_id, btf_mod)) { verbose(env, "calling kernel function %s is not allowed\n", func_name); return -EACCES; } /* Check the arguments */ - err = btf_check_kfunc_arg_match(env, btf_vmlinux, func_id, regs); + err = btf_check_kfunc_arg_match(env, desc_btf, func_id, regs); if (err) return err; @@ -6554,15 +6700,15 @@ static int check_kfunc_call(struct bpf_verifier_env *env, struct bpf_insn *insn) mark_reg_not_init(env, regs, caller_saved[i]); /* Check return type */ - t = btf_type_skip_modifiers(btf_vmlinux, func_proto->type, NULL); + t = btf_type_skip_modifiers(desc_btf, func_proto->type, NULL); if (btf_type_is_scalar(t)) { mark_reg_unknown(env, regs, BPF_REG_0); mark_btf_func_reg_size(env, BPF_REG_0, t->size); } else if (btf_type_is_ptr(t)) { - ptr_type = btf_type_skip_modifiers(btf_vmlinux, t->type, + ptr_type = btf_type_skip_modifiers(desc_btf, t->type, &ptr_type_id); if (!btf_type_is_struct(ptr_type)) { - ptr_type_name = btf_name_by_offset(btf_vmlinux, + ptr_type_name = btf_name_by_offset(desc_btf, ptr_type->name_off); verbose(env, "kernel function %s returns pointer type %s %s is not supported\n", func_name, btf_type_str(ptr_type), @@ -6570,7 +6716,7 @@ static int check_kfunc_call(struct bpf_verifier_env *env, struct bpf_insn *insn) return -EINVAL; } mark_reg_known_zero(env, regs, BPF_REG_0); - regs[BPF_REG_0].btf = btf_vmlinux; + regs[BPF_REG_0].btf = desc_btf; regs[BPF_REG_0].type = PTR_TO_BTF_ID; regs[BPF_REG_0].btf_id = ptr_type_id; mark_btf_func_reg_size(env, BPF_REG_0, sizeof(void *)); @@ -6581,7 +6727,7 @@ static int check_kfunc_call(struct bpf_verifier_env *env, struct bpf_insn *insn) for (i = 0; i < nargs; i++) { u32 regno = i + 1; - t = btf_type_skip_modifiers(btf_vmlinux, args[i].type, NULL); + t = btf_type_skip_modifiers(desc_btf, args[i].type, NULL); if (btf_type_is_ptr(t)) mark_btf_func_reg_size(env, regno, sizeof(void *)); else @@ -11121,7 +11267,8 @@ static int do_check(struct bpf_verifier_env *env) env->jmps_processed++; if (opcode == BPF_CALL) { if (BPF_SRC(insn->code) != BPF_K || - insn->off != 0 || + (insn->src_reg != BPF_PSEUDO_KFUNC_CALL + && insn->off != 0) || (insn->src_reg != BPF_REG_0 && insn->src_reg != BPF_PSEUDO_CALL && insn->src_reg != BPF_PSEUDO_KFUNC_CALL) || @@ -12477,6 +12624,7 @@ static int jit_subprogs(struct bpf_verifier_env *env) func[i]->aux->stack_depth = env->subprog_info[i].stack_depth; func[i]->jit_requested = 1; func[i]->aux->kfunc_tab = prog->aux->kfunc_tab; + func[i]->aux->kfunc_btf_tab = prog->aux->kfunc_btf_tab; func[i]->aux->linfo = prog->aux->linfo; func[i]->aux->nr_linfo = prog->aux->nr_linfo; func[i]->aux->jited_linfo = prog->aux->jited_linfo; @@ -12665,7 +12813,7 @@ static int fixup_kfunc_call(struct bpf_verifier_env *env, /* insn->imm has the btf func_id. Replace it with * an address (relative to __bpf_base_call). */ - desc = find_kfunc_desc(env->prog, insn->imm); + desc = find_kfunc_desc(env->prog, insn->imm, insn->off); if (!desc) { verbose(env, "verifier internal error: kernel function descriptor not found for func_id %u\n", insn->imm); diff --git a/net/bpf/test_run.c b/net/bpf/test_run.c index 072f0c16c779b..b1f6f5237de61 100644 --- a/net/bpf/test_run.c +++ b/net/bpf/test_run.c @@ -241,7 +241,7 @@ BTF_ID(func, bpf_kfunc_call_test2) BTF_ID(func, bpf_kfunc_call_test3) BTF_SET_END(test_sk_kfunc_ids) -bool bpf_prog_test_check_kfunc_call(u32 kfunc_id) +bool bpf_prog_test_check_kfunc_call(u32 kfunc_id, struct module *owner) { return btf_id_set_contains(&test_sk_kfunc_ids, kfunc_id); } diff --git a/net/ipv4/bpf_tcp_ca.c b/net/ipv4/bpf_tcp_ca.c index 0dcee9df13268..b3afd3361f342 100644 --- a/net/ipv4/bpf_tcp_ca.c +++ b/net/ipv4/bpf_tcp_ca.c @@ -255,7 +255,7 @@ BTF_ID(func, bbr_set_state) #endif /* CONFIG_X86 */ BTF_SET_END(bpf_tcp_ca_kfunc_ids) -static bool bpf_tcp_ca_check_kfunc_call(u32 kfunc_btf_id) +static bool bpf_tcp_ca_check_kfunc_call(u32 kfunc_btf_id, struct module *owner) { return btf_id_set_contains(&bpf_tcp_ca_kfunc_ids, kfunc_btf_id); } From a5d8272752416ef1a289e8d843f86b3aa4da0652 Mon Sep 17 00:00:00 2001 From: Kumar Kartikeya Dwivedi Date: Sat, 2 Oct 2021 06:47:50 +0530 Subject: [PATCH 002/181] bpf: Be conservative while processing invalid kfunc calls This patch also modifies the BPF verifier to only return error for invalid kfunc calls specially marked by userspace (with insn->imm == 0, insn->off == 0) after the verifier has eliminated dead instructions. This can be handled in the fixup stage, and skip processing during add and check stages. If such an invalid call is dropped, the fixup stage will not encounter insn->imm as 0, otherwise it bails out and returns an error. This will be exposed as weak ksym support in libbpf in later patches. Signed-off-by: Kumar Kartikeya Dwivedi Signed-off-by: Alexei Starovoitov Link: https://lore.kernel.org/bpf/20211002011757.311265-3-memxor@gmail.com --- kernel/bpf/verifier.c | 18 ++++++++++++++++++ 1 file changed, 18 insertions(+) diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c index 1d6d10265cab2..68d6862de82e7 100644 --- a/kernel/bpf/verifier.c +++ b/kernel/bpf/verifier.c @@ -1834,6 +1834,15 @@ static int add_kfunc_call(struct bpf_verifier_env *env, u32 func_id, s16 offset) prog_aux->kfunc_tab = tab; } + /* func_id == 0 is always invalid, but instead of returning an error, be + * conservative and wait until the code elimination pass before returning + * error, so that invalid calls that get pruned out can be in BPF programs + * loaded from userspace. It is also required that offset be untouched + * for such calls. + */ + if (!func_id && !offset) + return 0; + if (!btf_tab && offset) { btf_tab = kzalloc(sizeof(*btf_tab), GFP_KERNEL); if (!btf_tab) @@ -6675,6 +6684,10 @@ static int check_kfunc_call(struct bpf_verifier_env *env, struct bpf_insn *insn) struct btf *desc_btf; int err; + /* skip for now, but return error when we find this in fixup_kfunc_call */ + if (!insn->imm) + return 0; + desc_btf = find_kfunc_desc_btf(env, insn->imm, insn->off, &btf_mod); if (IS_ERR(desc_btf)) return PTR_ERR(desc_btf); @@ -12810,6 +12823,11 @@ static int fixup_kfunc_call(struct bpf_verifier_env *env, { const struct bpf_kfunc_desc *desc; + if (!insn->imm) { + verbose(env, "invalid kernel function call not eliminated in verifier pass\n"); + return -EINVAL; + } + /* insn->imm has the btf func_id. Replace it with * an address (relative to __bpf_base_call). */ From 14f267d95fe4b08831a022c8e15a2eb8991edbf6 Mon Sep 17 00:00:00 2001 From: Kumar Kartikeya Dwivedi Date: Sat, 2 Oct 2021 06:47:51 +0530 Subject: [PATCH 003/181] bpf: btf: Introduce helpers for dynamic BTF set registration This adds helpers for registering btf_id_set from modules and the bpf_check_mod_kfunc_call callback that can be used to look them up. With in kernel sets, the way this is supposed to work is, in kernel callback looks up within the in-kernel kfunc whitelist, and then defers to the dynamic BTF set lookup if it doesn't find the BTF id. If there is no in-kernel BTF id set, this callback can be used directly. Also fix includes for btf.h and bpfptr.h so that they can included in isolation. This is in preparation for their usage in tcp_bbr, tcp_cubic and tcp_dctcp modules in the next patch. Signed-off-by: Kumar Kartikeya Dwivedi Signed-off-by: Alexei Starovoitov Link: https://lore.kernel.org/bpf/20211002011757.311265-4-memxor@gmail.com --- include/linux/bpfptr.h | 1 + include/linux/btf.h | 36 +++++++++++++++++++++++++++++ kernel/bpf/btf.c | 52 ++++++++++++++++++++++++++++++++++++++++++ 3 files changed, 89 insertions(+) diff --git a/include/linux/bpfptr.h b/include/linux/bpfptr.h index 546e27fc6d462..46e1757d06a35 100644 --- a/include/linux/bpfptr.h +++ b/include/linux/bpfptr.h @@ -3,6 +3,7 @@ #ifndef _LINUX_BPFPTR_H #define _LINUX_BPFPTR_H +#include #include typedef sockptr_t bpfptr_t; diff --git a/include/linux/btf.h b/include/linux/btf.h index 214fde93214b9..6c4c61d821d77 100644 --- a/include/linux/btf.h +++ b/include/linux/btf.h @@ -5,6 +5,7 @@ #define _LINUX_BTF_H 1 #include +#include #include #include @@ -238,4 +239,39 @@ static inline const char *btf_name_by_offset(const struct btf *btf, } #endif +struct kfunc_btf_id_set { + struct list_head list; + struct btf_id_set *set; + struct module *owner; +}; + +struct kfunc_btf_id_list; + +#ifdef CONFIG_DEBUG_INFO_BTF_MODULES +void register_kfunc_btf_id_set(struct kfunc_btf_id_list *l, + struct kfunc_btf_id_set *s); +void unregister_kfunc_btf_id_set(struct kfunc_btf_id_list *l, + struct kfunc_btf_id_set *s); +bool bpf_check_mod_kfunc_call(struct kfunc_btf_id_list *klist, u32 kfunc_id, + struct module *owner); +#else +static inline void register_kfunc_btf_id_set(struct kfunc_btf_id_list *l, + struct kfunc_btf_id_set *s) +{ +} +static inline void unregister_kfunc_btf_id_set(struct kfunc_btf_id_list *l, + struct kfunc_btf_id_set *s) +{ +} +static inline bool bpf_check_mod_kfunc_call(struct kfunc_btf_id_list *klist, + u32 kfunc_id, struct module *owner) +{ + return false; +} +#endif + +#define DEFINE_KFUNC_BTF_ID_SET(set, name) \ + struct kfunc_btf_id_set name = { LIST_HEAD_INIT(name.list), (set), \ + THIS_MODULE } + #endif diff --git a/kernel/bpf/btf.c b/kernel/bpf/btf.c index c3d605b224739..62cbeb4951ebc 100644 --- a/kernel/bpf/btf.c +++ b/kernel/bpf/btf.c @@ -6343,3 +6343,55 @@ const struct bpf_func_proto bpf_btf_find_by_name_kind_proto = { }; BTF_ID_LIST_GLOBAL_SINGLE(btf_task_struct_ids, struct, task_struct) + +/* BTF ID set registration API for modules */ + +struct kfunc_btf_id_list { + struct list_head list; + struct mutex mutex; +}; + +#ifdef CONFIG_DEBUG_INFO_BTF_MODULES + +void register_kfunc_btf_id_set(struct kfunc_btf_id_list *l, + struct kfunc_btf_id_set *s) +{ + mutex_lock(&l->mutex); + list_add(&s->list, &l->list); + mutex_unlock(&l->mutex); +} +EXPORT_SYMBOL_GPL(register_kfunc_btf_id_set); + +void unregister_kfunc_btf_id_set(struct kfunc_btf_id_list *l, + struct kfunc_btf_id_set *s) +{ + mutex_lock(&l->mutex); + list_del_init(&s->list); + mutex_unlock(&l->mutex); +} +EXPORT_SYMBOL_GPL(unregister_kfunc_btf_id_set); + +bool bpf_check_mod_kfunc_call(struct kfunc_btf_id_list *klist, u32 kfunc_id, + struct module *owner) +{ + struct kfunc_btf_id_set *s; + + if (!owner) + return false; + mutex_lock(&klist->mutex); + list_for_each_entry(s, &klist->list, list) { + if (s->owner == owner && btf_id_set_contains(s->set, kfunc_id)) { + mutex_unlock(&klist->mutex); + return true; + } + } + mutex_unlock(&klist->mutex); + return false; +} + +#endif + +#define DEFINE_KFUNC_BTF_ID_LIST(name) \ + struct kfunc_btf_id_list name = { LIST_HEAD_INIT(name.list), \ + __MUTEX_INITIALIZER(name.mutex) }; \ + EXPORT_SYMBOL_GPL(name) From f614f2c755b6125c646d680d1c990b3b262bd0a9 Mon Sep 17 00:00:00 2001 From: Kumar Kartikeya Dwivedi Date: Sat, 2 Oct 2021 06:47:52 +0530 Subject: [PATCH 004/181] tools: Allow specifying base BTF file in resolve_btfids This commit allows specifying the base BTF for resolving btf id lists/sets during link time in the resolve_btfids tool. The base BTF is set to NULL if no path is passed. This allows resolving BTF ids for module kernel objects. Also, drop the --no-fail option, as it is only used in case .BTF_ids section is not present, instead make no-fail the default mode. The long option name is same as that of pahole. Signed-off-by: Kumar Kartikeya Dwivedi Signed-off-by: Alexei Starovoitov Acked-by: Andrii Nakryiko Link: https://lore.kernel.org/bpf/20211002011757.311265-5-memxor@gmail.com --- tools/bpf/resolve_btfids/main.c | 28 +++++++++++++++++++--------- tools/testing/selftests/bpf/Makefile | 2 +- 2 files changed, 20 insertions(+), 10 deletions(-) diff --git a/tools/bpf/resolve_btfids/main.c b/tools/bpf/resolve_btfids/main.c index de6365b53c9ca..c6c3e613858ac 100644 --- a/tools/bpf/resolve_btfids/main.c +++ b/tools/bpf/resolve_btfids/main.c @@ -89,6 +89,7 @@ struct btf_id { struct object { const char *path; const char *btf; + const char *base_btf_path; struct { int fd; @@ -477,16 +478,27 @@ static int symbols_resolve(struct object *obj) int nr_structs = obj->nr_structs; int nr_unions = obj->nr_unions; int nr_funcs = obj->nr_funcs; + struct btf *base_btf = NULL; int err, type_id; struct btf *btf; __u32 nr_types; - btf = btf__parse(obj->btf ?: obj->path, NULL); + if (obj->base_btf_path) { + base_btf = btf__parse(obj->base_btf_path, NULL); + err = libbpf_get_error(base_btf); + if (err) { + pr_err("FAILED: load base BTF from %s: %s\n", + obj->base_btf_path, strerror(-err)); + return -1; + } + } + + btf = btf__parse_split(obj->btf ?: obj->path, base_btf); err = libbpf_get_error(btf); if (err) { pr_err("FAILED: load BTF from %s: %s\n", obj->btf ?: obj->path, strerror(-err)); - return -1; + goto out; } err = -1; @@ -545,6 +557,7 @@ static int symbols_resolve(struct object *obj) err = 0; out: + btf__free(base_btf); btf__free(btf); return err; } @@ -678,7 +691,6 @@ static const char * const resolve_btfids_usage[] = { int main(int argc, const char **argv) { - bool no_fail = false; struct object obj = { .efile = { .idlist_shndx = -1, @@ -695,8 +707,8 @@ int main(int argc, const char **argv) "be more verbose (show errors, etc)"), OPT_STRING(0, "btf", &obj.btf, "BTF data", "BTF data"), - OPT_BOOLEAN(0, "no-fail", &no_fail, - "do not fail if " BTF_IDS_SECTION " section is not found"), + OPT_STRING('b', "btf_base", &obj.base_btf_path, "file", + "path of file providing base BTF"), OPT_END() }; int err = -1; @@ -717,10 +729,8 @@ int main(int argc, const char **argv) */ if (obj.efile.idlist_shndx == -1 || obj.efile.symbols_shndx == -1) { - if (no_fail) - return 0; - pr_err("FAILED to find needed sections\n"); - return -1; + pr_debug("Cannot find .BTF_ids or symbols sections, nothing to do\n"); + return 0; } if (symbols_collect(&obj)) diff --git a/tools/testing/selftests/bpf/Makefile b/tools/testing/selftests/bpf/Makefile index aa94739a18357..5a94d0900d1b8 100644 --- a/tools/testing/selftests/bpf/Makefile +++ b/tools/testing/selftests/bpf/Makefile @@ -454,7 +454,7 @@ $(OUTPUT)/$(TRUNNER_BINARY): $(TRUNNER_TEST_OBJS) \ | $(TRUNNER_BINARY)-extras $$(call msg,BINARY,,$$@) $(Q)$$(CC) $$(CFLAGS) $$(filter %.a %.o,$$^) $$(LDLIBS) -o $$@ - $(Q)$(RESOLVE_BTFIDS) --no-fail --btf $(TRUNNER_OUTPUT)/btf_data.o $$@ + $(Q)$(RESOLVE_BTFIDS) --btf $(TRUNNER_OUTPUT)/btf_data.o $$@ endef From 0e32dfc80bae53b05e9eda7eaf259f30ab9ba43a Mon Sep 17 00:00:00 2001 From: Kumar Kartikeya Dwivedi Date: Sat, 2 Oct 2021 06:47:53 +0530 Subject: [PATCH 005/181] bpf: Enable TCP congestion control kfunc from modules This commit moves BTF ID lookup into the newly added registration helper, in a way that the bbr, cubic, and dctcp implementation set up their sets in the bpf_tcp_ca kfunc_btf_set list, while the ones not dependent on modules are looked up from the wrapper function. This lifts the restriction for them to be compiled as built in objects, and can be loaded as modules if required. Also modify Makefile.modfinal to call resolve_btfids for each module. Note that since kernel kfunc_ids never overlap with module kfunc_ids, we only match the owner for module btf id sets. See following commits for background on use of: CONFIG_X86 ifdef: 569c484f9995 (bpf: Limit static tcp-cc functions in the .BTF_ids list to x86) CONFIG_DYNAMIC_FTRACE ifdef: 7aae231ac93b (bpf: tcp: Limit calling some tcp cc functions to CONFIG_DYNAMIC_FTRACE) Signed-off-by: Kumar Kartikeya Dwivedi Signed-off-by: Alexei Starovoitov Link: https://lore.kernel.org/bpf/20211002011757.311265-6-memxor@gmail.com --- include/linux/btf.h | 2 ++ kernel/bpf/btf.c | 2 ++ net/ipv4/bpf_tcp_ca.c | 34 +++------------------------------- net/ipv4/tcp_bbr.c | 28 +++++++++++++++++++++++++++- net/ipv4/tcp_cubic.c | 26 +++++++++++++++++++++++++- net/ipv4/tcp_dctcp.c | 26 +++++++++++++++++++++++++- scripts/Makefile.modfinal | 1 + 7 files changed, 85 insertions(+), 34 deletions(-) diff --git a/include/linux/btf.h b/include/linux/btf.h index 6c4c61d821d77..1d56cd2bb3626 100644 --- a/include/linux/btf.h +++ b/include/linux/btf.h @@ -274,4 +274,6 @@ static inline bool bpf_check_mod_kfunc_call(struct kfunc_btf_id_list *klist, struct kfunc_btf_id_set name = { LIST_HEAD_INIT(name.list), (set), \ THIS_MODULE } +extern struct kfunc_btf_id_list bpf_tcp_ca_kfunc_list; + #endif diff --git a/kernel/bpf/btf.c b/kernel/bpf/btf.c index 62cbeb4951ebc..1460dff3c1540 100644 --- a/kernel/bpf/btf.c +++ b/kernel/bpf/btf.c @@ -6395,3 +6395,5 @@ bool bpf_check_mod_kfunc_call(struct kfunc_btf_id_list *klist, u32 kfunc_id, struct kfunc_btf_id_list name = { LIST_HEAD_INIT(name.list), \ __MUTEX_INITIALIZER(name.mutex) }; \ EXPORT_SYMBOL_GPL(name) + +DEFINE_KFUNC_BTF_ID_LIST(bpf_tcp_ca_kfunc_list); diff --git a/net/ipv4/bpf_tcp_ca.c b/net/ipv4/bpf_tcp_ca.c index b3afd3361f342..57709ac09fb2b 100644 --- a/net/ipv4/bpf_tcp_ca.c +++ b/net/ipv4/bpf_tcp_ca.c @@ -223,41 +223,13 @@ BTF_ID(func, tcp_reno_cong_avoid) BTF_ID(func, tcp_reno_undo_cwnd) BTF_ID(func, tcp_slow_start) BTF_ID(func, tcp_cong_avoid_ai) -#ifdef CONFIG_X86 -#ifdef CONFIG_DYNAMIC_FTRACE -#if IS_BUILTIN(CONFIG_TCP_CONG_CUBIC) -BTF_ID(func, cubictcp_init) -BTF_ID(func, cubictcp_recalc_ssthresh) -BTF_ID(func, cubictcp_cong_avoid) -BTF_ID(func, cubictcp_state) -BTF_ID(func, cubictcp_cwnd_event) -BTF_ID(func, cubictcp_acked) -#endif -#if IS_BUILTIN(CONFIG_TCP_CONG_DCTCP) -BTF_ID(func, dctcp_init) -BTF_ID(func, dctcp_update_alpha) -BTF_ID(func, dctcp_cwnd_event) -BTF_ID(func, dctcp_ssthresh) -BTF_ID(func, dctcp_cwnd_undo) -BTF_ID(func, dctcp_state) -#endif -#if IS_BUILTIN(CONFIG_TCP_CONG_BBR) -BTF_ID(func, bbr_init) -BTF_ID(func, bbr_main) -BTF_ID(func, bbr_sndbuf_expand) -BTF_ID(func, bbr_undo_cwnd) -BTF_ID(func, bbr_cwnd_event) -BTF_ID(func, bbr_ssthresh) -BTF_ID(func, bbr_min_tso_segs) -BTF_ID(func, bbr_set_state) -#endif -#endif /* CONFIG_DYNAMIC_FTRACE */ -#endif /* CONFIG_X86 */ BTF_SET_END(bpf_tcp_ca_kfunc_ids) static bool bpf_tcp_ca_check_kfunc_call(u32 kfunc_btf_id, struct module *owner) { - return btf_id_set_contains(&bpf_tcp_ca_kfunc_ids, kfunc_btf_id); + if (btf_id_set_contains(&bpf_tcp_ca_kfunc_ids, kfunc_btf_id)) + return true; + return bpf_check_mod_kfunc_call(&bpf_tcp_ca_kfunc_list, kfunc_btf_id, owner); } static const struct bpf_verifier_ops bpf_tcp_ca_verifier_ops = { diff --git a/net/ipv4/tcp_bbr.c b/net/ipv4/tcp_bbr.c index 6274462b86b4b..ec5550089b4d2 100644 --- a/net/ipv4/tcp_bbr.c +++ b/net/ipv4/tcp_bbr.c @@ -56,6 +56,8 @@ * otherwise TCP stack falls back to an internal pacing using one high * resolution timer per TCP socket and may use more resources. */ +#include +#include #include #include #include @@ -1152,14 +1154,38 @@ static struct tcp_congestion_ops tcp_bbr_cong_ops __read_mostly = { .set_state = bbr_set_state, }; +BTF_SET_START(tcp_bbr_kfunc_ids) +#ifdef CONFIG_X86 +#ifdef CONFIG_DYNAMIC_FTRACE +BTF_ID(func, bbr_init) +BTF_ID(func, bbr_main) +BTF_ID(func, bbr_sndbuf_expand) +BTF_ID(func, bbr_undo_cwnd) +BTF_ID(func, bbr_cwnd_event) +BTF_ID(func, bbr_ssthresh) +BTF_ID(func, bbr_min_tso_segs) +BTF_ID(func, bbr_set_state) +#endif +#endif +BTF_SET_END(tcp_bbr_kfunc_ids) + +static DEFINE_KFUNC_BTF_ID_SET(&tcp_bbr_kfunc_ids, tcp_bbr_kfunc_btf_set); + static int __init bbr_register(void) { + int ret; + BUILD_BUG_ON(sizeof(struct bbr) > ICSK_CA_PRIV_SIZE); - return tcp_register_congestion_control(&tcp_bbr_cong_ops); + ret = tcp_register_congestion_control(&tcp_bbr_cong_ops); + if (ret) + return ret; + register_kfunc_btf_id_set(&bpf_tcp_ca_kfunc_list, &tcp_bbr_kfunc_btf_set); + return 0; } static void __exit bbr_unregister(void) { + unregister_kfunc_btf_id_set(&bpf_tcp_ca_kfunc_list, &tcp_bbr_kfunc_btf_set); tcp_unregister_congestion_control(&tcp_bbr_cong_ops); } diff --git a/net/ipv4/tcp_cubic.c b/net/ipv4/tcp_cubic.c index 4a30deaa9a37f..5e9d9c51164c4 100644 --- a/net/ipv4/tcp_cubic.c +++ b/net/ipv4/tcp_cubic.c @@ -25,6 +25,8 @@ */ #include +#include +#include #include #include #include @@ -482,8 +484,25 @@ static struct tcp_congestion_ops cubictcp __read_mostly = { .name = "cubic", }; +BTF_SET_START(tcp_cubic_kfunc_ids) +#ifdef CONFIG_X86 +#ifdef CONFIG_DYNAMIC_FTRACE +BTF_ID(func, cubictcp_init) +BTF_ID(func, cubictcp_recalc_ssthresh) +BTF_ID(func, cubictcp_cong_avoid) +BTF_ID(func, cubictcp_state) +BTF_ID(func, cubictcp_cwnd_event) +BTF_ID(func, cubictcp_acked) +#endif +#endif +BTF_SET_END(tcp_cubic_kfunc_ids) + +static DEFINE_KFUNC_BTF_ID_SET(&tcp_cubic_kfunc_ids, tcp_cubic_kfunc_btf_set); + static int __init cubictcp_register(void) { + int ret; + BUILD_BUG_ON(sizeof(struct bictcp) > ICSK_CA_PRIV_SIZE); /* Precompute a bunch of the scaling factors that are used per-packet @@ -514,11 +533,16 @@ static int __init cubictcp_register(void) /* divide by bic_scale and by constant Srtt (100ms) */ do_div(cube_factor, bic_scale * 10); - return tcp_register_congestion_control(&cubictcp); + ret = tcp_register_congestion_control(&cubictcp); + if (ret) + return ret; + register_kfunc_btf_id_set(&bpf_tcp_ca_kfunc_list, &tcp_cubic_kfunc_btf_set); + return 0; } static void __exit cubictcp_unregister(void) { + unregister_kfunc_btf_id_set(&bpf_tcp_ca_kfunc_list, &tcp_cubic_kfunc_btf_set); tcp_unregister_congestion_control(&cubictcp); } diff --git a/net/ipv4/tcp_dctcp.c b/net/ipv4/tcp_dctcp.c index 79f705450c162..0d7ab3cc7b614 100644 --- a/net/ipv4/tcp_dctcp.c +++ b/net/ipv4/tcp_dctcp.c @@ -36,6 +36,8 @@ * Glenn Judd */ +#include +#include #include #include #include @@ -236,14 +238,36 @@ static struct tcp_congestion_ops dctcp_reno __read_mostly = { .name = "dctcp-reno", }; +BTF_SET_START(tcp_dctcp_kfunc_ids) +#ifdef CONFIG_X86 +#ifdef CONFIG_DYNAMIC_FTRACE +BTF_ID(func, dctcp_init) +BTF_ID(func, dctcp_update_alpha) +BTF_ID(func, dctcp_cwnd_event) +BTF_ID(func, dctcp_ssthresh) +BTF_ID(func, dctcp_cwnd_undo) +BTF_ID(func, dctcp_state) +#endif +#endif +BTF_SET_END(tcp_dctcp_kfunc_ids) + +static DEFINE_KFUNC_BTF_ID_SET(&tcp_dctcp_kfunc_ids, tcp_dctcp_kfunc_btf_set); + static int __init dctcp_register(void) { + int ret; + BUILD_BUG_ON(sizeof(struct dctcp) > ICSK_CA_PRIV_SIZE); - return tcp_register_congestion_control(&dctcp); + ret = tcp_register_congestion_control(&dctcp); + if (ret) + return ret; + register_kfunc_btf_id_set(&bpf_tcp_ca_kfunc_list, &tcp_dctcp_kfunc_btf_set); + return 0; } static void __exit dctcp_unregister(void) { + unregister_kfunc_btf_id_set(&bpf_tcp_ca_kfunc_list, &tcp_dctcp_kfunc_btf_set); tcp_unregister_congestion_control(&dctcp); } diff --git a/scripts/Makefile.modfinal b/scripts/Makefile.modfinal index ff805777431ce..1fb45b011e4b6 100644 --- a/scripts/Makefile.modfinal +++ b/scripts/Makefile.modfinal @@ -41,6 +41,7 @@ quiet_cmd_btf_ko = BTF [M] $@ cmd_btf_ko = \ if [ -f vmlinux ]; then \ LLVM_OBJCOPY="$(OBJCOPY)" $(PAHOLE) -J --btf_base vmlinux $@; \ + $(RESOLVE_BTFIDS) -b vmlinux $@; \ else \ printf "Skipping BTF generation for %s due to unavailability of vmlinux\n" $@ 1>&2; \ fi; From 9dbe6015636c19f929a7f7b742f27f303ff6069d Mon Sep 17 00:00:00 2001 From: Kumar Kartikeya Dwivedi Date: Sat, 2 Oct 2021 06:47:54 +0530 Subject: [PATCH 006/181] libbpf: Support kernel module function calls This patch adds libbpf support for kernel module function call support. The fd_array parameter is used during BPF program load to pass module BTFs referenced by the program. insn->off is set to index into this array, but starts from 1, because insn->off as 0 is reserved for btf_vmlinux. We try to use existing insn->off for a module, since the kernel limits the maximum distinct module BTFs for kfuncs to 256, and also because index must never exceed the maximum allowed value that can fit in insn->off (INT16_MAX). In the future, if kernel interprets signed offset as unsigned for kfunc calls, this limit can be increased to UINT16_MAX. Also introduce a btf__find_by_name_kind_own helper to start searching from module BTF's start id when we know that the BTF ID is not present in vmlinux BTF (in find_ksym_btf_id). Signed-off-by: Kumar Kartikeya Dwivedi Signed-off-by: Alexei Starovoitov Link: https://lore.kernel.org/bpf/20211002011757.311265-7-memxor@gmail.com --- tools/lib/bpf/bpf.c | 1 + tools/lib/bpf/btf.c | 18 ++++++-- tools/lib/bpf/libbpf.c | 74 +++++++++++++++++++++++---------- tools/lib/bpf/libbpf_internal.h | 3 ++ 4 files changed, 72 insertions(+), 24 deletions(-) diff --git a/tools/lib/bpf/bpf.c b/tools/lib/bpf/bpf.c index 2401fad090c52..7d1741ceaa329 100644 --- a/tools/lib/bpf/bpf.c +++ b/tools/lib/bpf/bpf.c @@ -264,6 +264,7 @@ int libbpf__bpf_prog_load(const struct bpf_prog_load_params *load_attr) attr.line_info_rec_size = load_attr->line_info_rec_size; attr.line_info_cnt = load_attr->line_info_cnt; attr.line_info = ptr_to_u64(load_attr->line_info); + attr.fd_array = ptr_to_u64(load_attr->fd_array); if (load_attr->name) memcpy(attr.prog_name, load_attr->name, diff --git a/tools/lib/bpf/btf.c b/tools/lib/bpf/btf.c index 6ad63e4d418a2..7774f99afa6e6 100644 --- a/tools/lib/bpf/btf.c +++ b/tools/lib/bpf/btf.c @@ -695,15 +695,15 @@ __s32 btf__find_by_name(const struct btf *btf, const char *type_name) return libbpf_err(-ENOENT); } -__s32 btf__find_by_name_kind(const struct btf *btf, const char *type_name, - __u32 kind) +static __s32 btf_find_by_name_kind(const struct btf *btf, int start_id, + const char *type_name, __u32 kind) { __u32 i, nr_types = btf__get_nr_types(btf); if (kind == BTF_KIND_UNKN || !strcmp(type_name, "void")) return 0; - for (i = 1; i <= nr_types; i++) { + for (i = start_id; i <= nr_types; i++) { const struct btf_type *t = btf__type_by_id(btf, i); const char *name; @@ -717,6 +717,18 @@ __s32 btf__find_by_name_kind(const struct btf *btf, const char *type_name, return libbpf_err(-ENOENT); } +__s32 btf__find_by_name_kind_own(const struct btf *btf, const char *type_name, + __u32 kind) +{ + return btf_find_by_name_kind(btf, btf->start_id, type_name, kind); +} + +__s32 btf__find_by_name_kind(const struct btf *btf, const char *type_name, + __u32 kind) +{ + return btf_find_by_name_kind(btf, 1, type_name, kind); +} + static bool btf_is_modifiable(const struct btf *btf) { return (void *)btf->hdr != btf->raw_data; diff --git a/tools/lib/bpf/libbpf.c b/tools/lib/bpf/libbpf.c index e23f1b6b9402b..ea1c51dbc0f38 100644 --- a/tools/lib/bpf/libbpf.c +++ b/tools/lib/bpf/libbpf.c @@ -443,6 +443,11 @@ struct extern_desc { /* local btf_id of the ksym extern's type. */ __u32 type_id; + /* BTF fd index to be patched in for insn->off, this is + * 0 for vmlinux BTF, index in obj->fd_array for module + * BTF + */ + __s16 btf_fd_idx; } ksym; }; }; @@ -454,6 +459,7 @@ struct module_btf { char *name; __u32 id; int fd; + int fd_array_idx; }; struct bpf_object { @@ -539,6 +545,10 @@ struct bpf_object { void *priv; bpf_object_clear_priv_t clear_priv; + int *fd_array; + size_t fd_array_cap; + size_t fd_array_cnt; + char path[]; }; #define obj_elf_valid(o) ((o)->efile.elf) @@ -5407,6 +5417,7 @@ bpf_object__relocate_data(struct bpf_object *obj, struct bpf_program *prog) ext = &obj->externs[relo->sym_off]; insn[0].src_reg = BPF_PSEUDO_KFUNC_CALL; insn[0].imm = ext->ksym.kernel_btf_id; + insn[0].off = ext->ksym.btf_fd_idx; break; case RELO_SUBPROG_ADDR: if (insn[0].src_reg != BPF_PSEUDO_FUNC) { @@ -6236,6 +6247,7 @@ load_program(struct bpf_program *prog, struct bpf_insn *insns, int insns_cnt, } load_attr.log_level = prog->log_level; load_attr.prog_flags = prog->prog_flags; + load_attr.fd_array = prog->obj->fd_array; /* adjust load_attr if sec_def provides custom preload callback */ if (prog->sec_def && prog->sec_def->preload_fn) { @@ -6752,13 +6764,14 @@ static int bpf_object__read_kallsyms_file(struct bpf_object *obj) static int find_ksym_btf_id(struct bpf_object *obj, const char *ksym_name, __u16 kind, struct btf **res_btf, - int *res_btf_fd) + struct module_btf **res_mod_btf) { - int i, id, btf_fd, err; + struct module_btf *mod_btf; struct btf *btf; + int i, id, err; btf = obj->btf_vmlinux; - btf_fd = 0; + mod_btf = NULL; id = btf__find_by_name_kind(btf, ksym_name, kind); if (id == -ENOENT) { @@ -6767,10 +6780,10 @@ static int find_ksym_btf_id(struct bpf_object *obj, const char *ksym_name, return err; for (i = 0; i < obj->btf_module_cnt; i++) { - btf = obj->btf_modules[i].btf; - /* we assume module BTF FD is always >0 */ - btf_fd = obj->btf_modules[i].fd; - id = btf__find_by_name_kind(btf, ksym_name, kind); + /* we assume module_btf's BTF FD is always >0 */ + mod_btf = &obj->btf_modules[i]; + btf = mod_btf->btf; + id = btf__find_by_name_kind_own(btf, ksym_name, kind); if (id != -ENOENT) break; } @@ -6779,7 +6792,7 @@ static int find_ksym_btf_id(struct bpf_object *obj, const char *ksym_name, return -ESRCH; *res_btf = btf; - *res_btf_fd = btf_fd; + *res_mod_btf = mod_btf; return id; } @@ -6788,11 +6801,12 @@ static int bpf_object__resolve_ksym_var_btf_id(struct bpf_object *obj, { const struct btf_type *targ_var, *targ_type; __u32 targ_type_id, local_type_id; + struct module_btf *mod_btf = NULL; const char *targ_var_name; - int id, btf_fd = 0, err; struct btf *btf = NULL; + int id, err; - id = find_ksym_btf_id(obj, ext->name, BTF_KIND_VAR, &btf, &btf_fd); + id = find_ksym_btf_id(obj, ext->name, BTF_KIND_VAR, &btf, &mod_btf); if (id == -ESRCH && ext->is_weak) { return 0; } else if (id < 0) { @@ -6827,7 +6841,7 @@ static int bpf_object__resolve_ksym_var_btf_id(struct bpf_object *obj, } ext->is_set = true; - ext->ksym.kernel_btf_obj_fd = btf_fd; + ext->ksym.kernel_btf_obj_fd = mod_btf ? mod_btf->fd : 0; ext->ksym.kernel_btf_id = id; pr_debug("extern (var ksym) '%s': resolved to [%d] %s %s\n", ext->name, id, btf_kind_str(targ_var), targ_var_name); @@ -6839,26 +6853,20 @@ static int bpf_object__resolve_ksym_func_btf_id(struct bpf_object *obj, struct extern_desc *ext) { int local_func_proto_id, kfunc_proto_id, kfunc_id; + struct module_btf *mod_btf = NULL; const struct btf_type *kern_func; struct btf *kern_btf = NULL; - int ret, kern_btf_fd = 0; + int ret; local_func_proto_id = ext->ksym.type_id; - kfunc_id = find_ksym_btf_id(obj, ext->name, BTF_KIND_FUNC, - &kern_btf, &kern_btf_fd); + kfunc_id = find_ksym_btf_id(obj, ext->name, BTF_KIND_FUNC, &kern_btf, &mod_btf); if (kfunc_id < 0) { pr_warn("extern (func ksym) '%s': not found in kernel BTF\n", ext->name); return kfunc_id; } - if (kern_btf != obj->btf_vmlinux) { - pr_warn("extern (func ksym) '%s': function in kernel module is not supported\n", - ext->name); - return -ENOTSUP; - } - kern_func = btf__type_by_id(kern_btf, kfunc_id); kfunc_proto_id = kern_func->type; @@ -6870,9 +6878,30 @@ static int bpf_object__resolve_ksym_func_btf_id(struct bpf_object *obj, return -EINVAL; } + /* set index for module BTF fd in fd_array, if unset */ + if (mod_btf && !mod_btf->fd_array_idx) { + /* insn->off is s16 */ + if (obj->fd_array_cnt == INT16_MAX) { + pr_warn("extern (func ksym) '%s': module BTF fd index %d too big to fit in bpf_insn offset\n", + ext->name, mod_btf->fd_array_idx); + return -E2BIG; + } + /* Cannot use index 0 for module BTF fd */ + if (!obj->fd_array_cnt) + obj->fd_array_cnt = 1; + + ret = libbpf_ensure_mem((void **)&obj->fd_array, &obj->fd_array_cap, sizeof(int), + obj->fd_array_cnt + 1); + if (ret) + return ret; + mod_btf->fd_array_idx = obj->fd_array_cnt; + /* we assume module BTF FD is always >0 */ + obj->fd_array[obj->fd_array_cnt++] = mod_btf->fd; + } + ext->is_set = true; - ext->ksym.kernel_btf_obj_fd = kern_btf_fd; ext->ksym.kernel_btf_id = kfunc_id; + ext->ksym.btf_fd_idx = mod_btf ? mod_btf->fd_array_idx : 0; pr_debug("extern (func ksym) '%s': resolved to kernel [%d]\n", ext->name, kfunc_id); @@ -7031,6 +7060,9 @@ int bpf_object__load_xattr(struct bpf_object_load_attr *attr) err = bpf_gen__finish(obj->gen_loader); } + /* clean up fd_array */ + zfree(&obj->fd_array); + /* clean up module BTFs */ for (i = 0; i < obj->btf_module_cnt; i++) { close(obj->btf_modules[i].fd); diff --git a/tools/lib/bpf/libbpf_internal.h b/tools/lib/bpf/libbpf_internal.h index ec79400517d42..f7fd3944d46d3 100644 --- a/tools/lib/bpf/libbpf_internal.h +++ b/tools/lib/bpf/libbpf_internal.h @@ -298,6 +298,7 @@ struct bpf_prog_load_params { __u32 log_level; char *log_buf; size_t log_buf_sz; + int *fd_array; }; int libbpf__bpf_prog_load(const struct bpf_prog_load_params *load_attr); @@ -408,6 +409,8 @@ int btf_type_visit_type_ids(struct btf_type *t, type_id_visit_fn visit, void *ct int btf_type_visit_str_offs(struct btf_type *t, str_off_visit_fn visit, void *ctx); int btf_ext_visit_type_ids(struct btf_ext *btf_ext, type_id_visit_fn visit, void *ctx); int btf_ext_visit_str_offs(struct btf_ext *btf_ext, str_off_visit_fn visit, void *ctx); +__s32 btf__find_by_name_kind_own(const struct btf *btf, const char *type_name, + __u32 kind); extern enum libbpf_strict_mode libbpf_mode; From 466b2e13971ef65cd7b621ca3044be14028b002b Mon Sep 17 00:00:00 2001 From: Kumar Kartikeya Dwivedi Date: Sat, 2 Oct 2021 06:47:55 +0530 Subject: [PATCH 007/181] libbpf: Resolve invalid weak kfunc calls with imm = 0, off = 0 Preserve these calls as it allows verifier to succeed in loading the program if they are determined to be unreachable after dead code elimination during program load. If not, the verifier will fail at runtime. This is done for ext->is_weak symbols similar to the case for variable ksyms. Signed-off-by: Kumar Kartikeya Dwivedi Signed-off-by: Alexei Starovoitov Acked-by: Andrii Nakryiko Link: https://lore.kernel.org/bpf/20211002011757.311265-8-memxor@gmail.com --- tools/lib/bpf/libbpf.c | 24 +++++++++++++----------- 1 file changed, 13 insertions(+), 11 deletions(-) diff --git a/tools/lib/bpf/libbpf.c b/tools/lib/bpf/libbpf.c index ea1c51dbc0f38..092cf4bd18793 100644 --- a/tools/lib/bpf/libbpf.c +++ b/tools/lib/bpf/libbpf.c @@ -3439,11 +3439,6 @@ static int bpf_object__collect_externs(struct bpf_object *obj) return -ENOTSUP; } } else if (strcmp(sec_name, KSYMS_SEC) == 0) { - if (btf_is_func(t) && ext->is_weak) { - pr_warn("extern weak function %s is unsupported\n", - ext->name); - return -ENOTSUP; - } ksym_sec = sec; ext->type = EXT_KSYM; skip_mods_and_typedefs(obj->btf, t->type, @@ -5416,8 +5411,13 @@ bpf_object__relocate_data(struct bpf_object *obj, struct bpf_program *prog) case RELO_EXTERN_FUNC: ext = &obj->externs[relo->sym_off]; insn[0].src_reg = BPF_PSEUDO_KFUNC_CALL; - insn[0].imm = ext->ksym.kernel_btf_id; - insn[0].off = ext->ksym.btf_fd_idx; + if (ext->is_set) { + insn[0].imm = ext->ksym.kernel_btf_id; + insn[0].off = ext->ksym.btf_fd_idx; + } else { /* unresolved weak kfunc */ + insn[0].imm = 0; + insn[0].off = 0; + } break; case RELO_SUBPROG_ADDR: if (insn[0].src_reg != BPF_PSEUDO_FUNC) { @@ -6807,9 +6807,9 @@ static int bpf_object__resolve_ksym_var_btf_id(struct bpf_object *obj, int id, err; id = find_ksym_btf_id(obj, ext->name, BTF_KIND_VAR, &btf, &mod_btf); - if (id == -ESRCH && ext->is_weak) { - return 0; - } else if (id < 0) { + if (id < 0) { + if (id == -ESRCH && ext->is_weak) + return 0; pr_warn("extern (var ksym) '%s': not found in kernel BTF\n", ext->name); return id; @@ -6862,7 +6862,9 @@ static int bpf_object__resolve_ksym_func_btf_id(struct bpf_object *obj, kfunc_id = find_ksym_btf_id(obj, ext->name, BTF_KIND_FUNC, &kern_btf, &mod_btf); if (kfunc_id < 0) { - pr_warn("extern (func ksym) '%s': not found in kernel BTF\n", + if (kfunc_id == -ESRCH && ext->is_weak) + return 0; + pr_warn("extern (func ksym) '%s': not found in kernel or module BTFs\n", ext->name); return kfunc_id; } From 18f4fccbf314fdb07d276f4cd3eaf53f1825550d Mon Sep 17 00:00:00 2001 From: Kumar Kartikeya Dwivedi Date: Sat, 2 Oct 2021 06:47:56 +0530 Subject: [PATCH 008/181] libbpf: Update gen_loader to emit BTF_KIND_FUNC relocations This change updates the BPF syscall loader to relocate BTF_KIND_FUNC relocations, with support for weak kfunc relocations. The general idea is to move map_fds to loader map, and also use the data for storing kfunc BTF fds. Since both reuse the fd_array parameter, they need to be kept together. For map_fds, we reserve MAX_USED_MAPS slots in a region, and for kfunc, we reserve MAX_KFUNC_DESCS. This is done so that insn->off has more chances of being <= INT16_MAX than treating data map as a sparse array and adding fd as needed. When the MAX_KFUNC_DESCS limit is reached, we fall back to the sparse array model, so that as long as it does remain <= INT16_MAX, we pass an index relative to the start of fd_array. We store all ksyms in an array where we try to avoid calling the bpf_btf_find_by_name_kind helper, and also reuse the BTF fd that was already stored. This also speeds up the loading process compared to emitting calls in all cases, in later tests. Signed-off-by: Kumar Kartikeya Dwivedi Signed-off-by: Alexei Starovoitov Link: https://lore.kernel.org/bpf/20211002011757.311265-9-memxor@gmail.com --- tools/lib/bpf/bpf_gen_internal.h | 16 +- tools/lib/bpf/gen_loader.c | 314 +++++++++++++++++++++++++------ tools/lib/bpf/libbpf.c | 8 +- 3 files changed, 280 insertions(+), 58 deletions(-) diff --git a/tools/lib/bpf/bpf_gen_internal.h b/tools/lib/bpf/bpf_gen_internal.h index 615400391e578..70eccbffefb19 100644 --- a/tools/lib/bpf/bpf_gen_internal.h +++ b/tools/lib/bpf/bpf_gen_internal.h @@ -7,6 +7,15 @@ struct ksym_relo_desc { const char *name; int kind; int insn_idx; + bool is_weak; +}; + +struct ksym_desc { + const char *name; + int ref; + int kind; + int off; + int insn; }; struct bpf_gen { @@ -24,6 +33,10 @@ struct bpf_gen { int relo_cnt; char attach_target[128]; int attach_kind; + struct ksym_desc *ksyms; + __u32 nr_ksyms; + int fd_array; + int nr_fd_array; }; void bpf_gen__init(struct bpf_gen *gen, int log_level); @@ -36,6 +49,7 @@ void bpf_gen__prog_load(struct bpf_gen *gen, struct bpf_prog_load_params *load_a void bpf_gen__map_update_elem(struct bpf_gen *gen, int map_idx, void *value, __u32 value_size); void bpf_gen__map_freeze(struct bpf_gen *gen, int map_idx); void bpf_gen__record_attach_target(struct bpf_gen *gen, const char *name, enum bpf_attach_type type); -void bpf_gen__record_extern(struct bpf_gen *gen, const char *name, int kind, int insn_idx); +void bpf_gen__record_extern(struct bpf_gen *gen, const char *name, bool is_weak, int kind, + int insn_idx); #endif diff --git a/tools/lib/bpf/gen_loader.c b/tools/lib/bpf/gen_loader.c index 80087b13877fb..937bfc7db41e7 100644 --- a/tools/lib/bpf/gen_loader.c +++ b/tools/lib/bpf/gen_loader.c @@ -14,8 +14,10 @@ #include "bpf_gen_internal.h" #include "skel_internal.h" -#define MAX_USED_MAPS 64 -#define MAX_USED_PROGS 32 +#define MAX_USED_MAPS 64 +#define MAX_USED_PROGS 32 +#define MAX_KFUNC_DESCS 256 +#define MAX_FD_ARRAY_SZ (MAX_USED_PROGS + MAX_KFUNC_DESCS) /* The following structure describes the stack layout of the loader program. * In addition R6 contains the pointer to context. @@ -30,7 +32,6 @@ */ struct loader_stack { __u32 btf_fd; - __u32 map_fd[MAX_USED_MAPS]; __u32 prog_fd[MAX_USED_PROGS]; __u32 inner_map_fd; }; @@ -143,13 +144,49 @@ static int add_data(struct bpf_gen *gen, const void *data, __u32 size) if (realloc_data_buf(gen, size8)) return 0; prev = gen->data_cur; - memcpy(gen->data_cur, data, size); - gen->data_cur += size; - memcpy(gen->data_cur, &zero, size8 - size); - gen->data_cur += size8 - size; + if (data) { + memcpy(gen->data_cur, data, size); + memcpy(gen->data_cur + size, &zero, size8 - size); + } else { + memset(gen->data_cur, 0, size8); + } + gen->data_cur += size8; return prev - gen->data_start; } +/* Get index for map_fd/btf_fd slot in reserved fd_array, or in data relative + * to start of fd_array. Caller can decide if it is usable or not. + */ +static int add_map_fd(struct bpf_gen *gen) +{ + if (!gen->fd_array) + gen->fd_array = add_data(gen, NULL, MAX_FD_ARRAY_SZ * sizeof(int)); + if (gen->nr_maps == MAX_USED_MAPS) { + pr_warn("Total maps exceeds %d\n", MAX_USED_MAPS); + gen->error = -E2BIG; + return 0; + } + return gen->nr_maps++; +} + +static int add_kfunc_btf_fd(struct bpf_gen *gen) +{ + int cur; + + if (!gen->fd_array) + gen->fd_array = add_data(gen, NULL, MAX_FD_ARRAY_SZ * sizeof(int)); + if (gen->nr_fd_array == MAX_KFUNC_DESCS) { + cur = add_data(gen, NULL, sizeof(int)); + return (cur - gen->fd_array) / sizeof(int); + } + return MAX_USED_MAPS + gen->nr_fd_array++; +} + +static int blob_fd_array_off(struct bpf_gen *gen, int index) +{ + return gen->fd_array + index * sizeof(int); +} + static int insn_bytes_to_bpf_size(__u32 sz) { switch (sz) { @@ -171,14 +208,22 @@ static void emit_rel_store(struct bpf_gen *gen, int off, int data) emit(gen, BPF_STX_MEM(BPF_DW, BPF_REG_1, BPF_REG_0, 0)); } -/* *(u64 *)(blob + off) = (u64)(void *)(%sp + stack_off) */ -static void emit_rel_store_sp(struct bpf_gen *gen, int off, int stack_off) +static void move_blob2blob(struct bpf_gen *gen, int off, int size, int blob_off) { - emit(gen, BPF_MOV64_REG(BPF_REG_0, BPF_REG_10)); - emit(gen, BPF_ALU64_IMM(BPF_ADD, BPF_REG_0, stack_off)); + emit2(gen, BPF_LD_IMM64_RAW_FULL(BPF_REG_2, BPF_PSEUDO_MAP_IDX_VALUE, + 0, 0, 0, blob_off)); + emit(gen, BPF_LDX_MEM(insn_bytes_to_bpf_size(size), BPF_REG_0, BPF_REG_2, 0)); emit2(gen, BPF_LD_IMM64_RAW_FULL(BPF_REG_1, BPF_PSEUDO_MAP_IDX_VALUE, 0, 0, 0, off)); - emit(gen, BPF_STX_MEM(BPF_DW, BPF_REG_1, BPF_REG_0, 0)); + emit(gen, BPF_STX_MEM(insn_bytes_to_bpf_size(size), BPF_REG_1, BPF_REG_0, 0)); +} + +static void move_blob2ctx(struct bpf_gen *gen, int ctx_off, int size, int blob_off) +{ + emit2(gen, BPF_LD_IMM64_RAW_FULL(BPF_REG_1, BPF_PSEUDO_MAP_IDX_VALUE, + 0, 0, 0, blob_off)); + emit(gen, BPF_LDX_MEM(insn_bytes_to_bpf_size(size), BPF_REG_0, BPF_REG_1, 0)); + emit(gen, BPF_STX_MEM(insn_bytes_to_bpf_size(size), BPF_REG_6, BPF_REG_0, ctx_off)); } static void move_ctx2blob(struct bpf_gen *gen, int off, int size, int ctx_off, @@ -326,11 +371,11 @@ int bpf_gen__finish(struct bpf_gen *gen) offsetof(struct bpf_prog_desc, prog_fd), 4, stack_off(prog_fd[i])); for (i = 0; i < gen->nr_maps; i++) - move_stack2ctx(gen, - sizeof(struct bpf_loader_ctx) + - sizeof(struct bpf_map_desc) * i + - offsetof(struct bpf_map_desc, map_fd), 4, - stack_off(map_fd[i])); + move_blob2ctx(gen, + sizeof(struct bpf_loader_ctx) + + sizeof(struct bpf_map_desc) * i + + offsetof(struct bpf_map_desc, map_fd), 4, + blob_fd_array_off(gen, i)); emit(gen, BPF_MOV64_IMM(BPF_REG_0, 0)); emit(gen, BPF_EXIT_INSN()); pr_debug("gen: finish %d\n", gen->error); @@ -390,7 +435,7 @@ void bpf_gen__map_create(struct bpf_gen *gen, { int attr_size = offsetofend(union bpf_attr, btf_vmlinux_value_type_id); bool close_inner_map_fd = false; - int map_create_attr; + int map_create_attr, idx; union bpf_attr attr; memset(&attr, 0, attr_size); @@ -467,9 +512,11 @@ void bpf_gen__map_create(struct bpf_gen *gen, gen->error = -EDOM; /* internal bug */ return; } else { - emit(gen, BPF_STX_MEM(BPF_W, BPF_REG_10, BPF_REG_7, - stack_off(map_fd[map_idx]))); - gen->nr_maps++; + /* add_map_fd does gen->nr_maps++ */ + idx = add_map_fd(gen); + emit2(gen, BPF_LD_IMM64_RAW_FULL(BPF_REG_1, BPF_PSEUDO_MAP_IDX_VALUE, + 0, 0, 0, blob_fd_array_off(gen, idx))); + emit(gen, BPF_STX_MEM(BPF_W, BPF_REG_1, BPF_REG_7, 0)); } if (close_inner_map_fd) emit_sys_close_stack(gen, stack_off(inner_map_fd)); @@ -511,8 +558,8 @@ static void emit_find_attach_target(struct bpf_gen *gen) */ } -void bpf_gen__record_extern(struct bpf_gen *gen, const char *name, int kind, - int insn_idx) +void bpf_gen__record_extern(struct bpf_gen *gen, const char *name, bool is_weak, + int kind, int insn_idx) { struct ksym_relo_desc *relo; @@ -524,38 +571,192 @@ void bpf_gen__record_extern(struct bpf_gen *gen, const char *name, int kind, gen->relos = relo; relo += gen->relo_cnt; relo->name = name; + relo->is_weak = is_weak; relo->kind = kind; relo->insn_idx = insn_idx; gen->relo_cnt++; } -static void emit_relo(struct bpf_gen *gen, struct ksym_relo_desc *relo, int insns) +/* returns existing ksym_desc with ref incremented, or inserts a new one */ +static struct ksym_desc *get_ksym_desc(struct bpf_gen *gen, struct ksym_relo_desc *relo) { - int name, insn, len = strlen(relo->name) + 1; + struct ksym_desc *kdesc; - pr_debug("gen: emit_relo: %s at %d\n", relo->name, relo->insn_idx); - name = add_data(gen, relo->name, len); + for (int i = 0; i < gen->nr_ksyms; i++) { + if (!strcmp(gen->ksyms[i].name, relo->name)) { + gen->ksyms[i].ref++; + return &gen->ksyms[i]; + } + } + kdesc = libbpf_reallocarray(gen->ksyms, gen->nr_ksyms + 1, sizeof(*kdesc)); + if (!kdesc) { + gen->error = -ENOMEM; + return NULL; + } + gen->ksyms = kdesc; + kdesc = &gen->ksyms[gen->nr_ksyms++]; + kdesc->name = relo->name; + kdesc->kind = relo->kind; + kdesc->ref = 1; + kdesc->off = 0; + kdesc->insn = 0; + return kdesc; +} + +/* Overwrites BPF_REG_{0, 1, 2, 3, 4, 7} + * Returns result in BPF_REG_7 + */ +static void emit_bpf_find_by_name_kind(struct bpf_gen *gen, struct ksym_relo_desc *relo) +{ + int name_off, len = strlen(relo->name) + 1; + name_off = add_data(gen, relo->name, len); emit2(gen, BPF_LD_IMM64_RAW_FULL(BPF_REG_1, BPF_PSEUDO_MAP_IDX_VALUE, - 0, 0, 0, name)); + 0, 0, 0, name_off)); emit(gen, BPF_MOV64_IMM(BPF_REG_2, len)); emit(gen, BPF_MOV64_IMM(BPF_REG_3, relo->kind)); emit(gen, BPF_MOV64_IMM(BPF_REG_4, 0)); emit(gen, BPF_EMIT_CALL(BPF_FUNC_btf_find_by_name_kind)); emit(gen, BPF_MOV64_REG(BPF_REG_7, BPF_REG_0)); debug_ret(gen, "find_by_name_kind(%s,%d)", relo->name, relo->kind); - emit_check_err(gen); +} + +/* Expects: + * BPF_REG_8 - pointer to instruction + * + * We need to reuse BTF fd for same symbol otherwise each relocation takes a new + * index, while kernel limits total kfunc BTFs to 256. For duplicate symbols, + * this would mean a new BTF fd index for each entry. By pairing symbol name + * with index, we get the insn->imm, insn->off pairing that kernel uses for + * kfunc_tab, which becomes the effective limit even though all of them may + * share same index in fd_array (such that kfunc_btf_tab has 1 element). + */ +static void emit_relo_kfunc_btf(struct bpf_gen *gen, struct ksym_relo_desc *relo, int insn) +{ + struct ksym_desc *kdesc; + int btf_fd_idx; + + kdesc = get_ksym_desc(gen, relo); + if (!kdesc) + return; + /* try to copy from existing bpf_insn */ + if (kdesc->ref > 1) { + move_blob2blob(gen, insn + offsetof(struct bpf_insn, imm), 4, + kdesc->insn + offsetof(struct bpf_insn, imm)); + move_blob2blob(gen, insn + offsetof(struct bpf_insn, off), 2, + kdesc->insn + offsetof(struct bpf_insn, off)); + goto log; + } + /* remember insn offset, so we can copy BTF ID and FD later */ + kdesc->insn = insn; + emit_bpf_find_by_name_kind(gen, relo); + if (!relo->is_weak) + emit_check_err(gen); + /* get index in fd_array to store BTF FD at */ + btf_fd_idx = add_kfunc_btf_fd(gen); + if (btf_fd_idx > INT16_MAX) { + pr_warn("BTF fd off %d for kfunc %s exceeds INT16_MAX, cannot process relocation\n", + btf_fd_idx, relo->name); + gen->error = -E2BIG; + return; + } + kdesc->off = btf_fd_idx; + /* set a default value for imm */ + emit(gen, BPF_ST_MEM(BPF_W, BPF_REG_8, offsetof(struct bpf_insn, imm), 0)); + /* skip success case store if ret < 0 */ + emit(gen, BPF_JMP_IMM(BPF_JSLT, BPF_REG_7, 0, 1)); /* store btf_id into insn[insn_idx].imm */ - insn = insns + sizeof(struct bpf_insn) * relo->insn_idx + - offsetof(struct bpf_insn, imm); + emit(gen, BPF_STX_MEM(BPF_W, BPF_REG_8, BPF_REG_7, offsetof(struct bpf_insn, imm))); + /* load fd_array slot pointer */ emit2(gen, BPF_LD_IMM64_RAW_FULL(BPF_REG_0, BPF_PSEUDO_MAP_IDX_VALUE, - 0, 0, 0, insn)); - emit(gen, BPF_STX_MEM(BPF_W, BPF_REG_0, BPF_REG_7, 0)); - if (relo->kind == BTF_KIND_VAR) { - /* store btf_obj_fd into insn[insn_idx + 1].imm */ - emit(gen, BPF_ALU64_IMM(BPF_RSH, BPF_REG_7, 32)); - emit(gen, BPF_STX_MEM(BPF_W, BPF_REG_0, BPF_REG_7, - sizeof(struct bpf_insn))); + 0, 0, 0, blob_fd_array_off(gen, btf_fd_idx))); + /* skip store of BTF fd if ret < 0 */ + emit(gen, BPF_JMP_IMM(BPF_JSLT, BPF_REG_7, 0, 3)); + /* store BTF fd in slot */ + emit(gen, BPF_MOV64_REG(BPF_REG_9, BPF_REG_7)); + emit(gen, BPF_ALU64_IMM(BPF_RSH, BPF_REG_9, 32)); + emit(gen, BPF_STX_MEM(BPF_W, BPF_REG_0, BPF_REG_9, 0)); + /* set a default value for off */ + emit(gen, BPF_ST_MEM(BPF_H, BPF_REG_8, offsetof(struct bpf_insn, off), 0)); + /* skip insn->off store if ret < 0 */ + emit(gen, BPF_JMP_IMM(BPF_JSLT, BPF_REG_7, 0, 2)); + /* skip if vmlinux BTF */ + emit(gen, BPF_JMP_IMM(BPF_JEQ, BPF_REG_9, 0, 1)); + /* store index into insn[insn_idx].off */ + emit(gen, BPF_ST_MEM(BPF_H, BPF_REG_8, offsetof(struct bpf_insn, off), btf_fd_idx)); +log: + if (!gen->log_level) + return; + emit(gen, BPF_LDX_MEM(BPF_W, BPF_REG_7, BPF_REG_8, + offsetof(struct bpf_insn, imm))); + emit(gen, BPF_LDX_MEM(BPF_H, BPF_REG_9, BPF_REG_8, + offsetof(struct bpf_insn, off))); + debug_regs(gen, BPF_REG_7, BPF_REG_9, " func (%s:count=%d): imm: %%d, off: %%d", + relo->name, kdesc->ref); + emit2(gen, BPF_LD_IMM64_RAW_FULL(BPF_REG_0, BPF_PSEUDO_MAP_IDX_VALUE, + 0, 0, 0, blob_fd_array_off(gen, kdesc->off))); + emit(gen, BPF_LDX_MEM(BPF_W, BPF_REG_9, BPF_REG_0, 0)); + debug_regs(gen, BPF_REG_9, -1, " func (%s:count=%d): btf_fd", + relo->name, kdesc->ref); +} + +/* Expects: + * BPF_REG_8 - pointer to instruction + */ +static void emit_relo_ksym_btf(struct bpf_gen *gen, struct ksym_relo_desc *relo, int insn) +{ + struct ksym_desc *kdesc; + + kdesc = get_ksym_desc(gen, relo); + if (!kdesc) + return; + /* try to copy from existing ldimm64 insn */ + if (kdesc->ref > 1) { + move_blob2blob(gen, insn + offsetof(struct bpf_insn, imm), 4, + kdesc->insn + offsetof(struct bpf_insn, imm)); + move_blob2blob(gen, insn + sizeof(struct bpf_insn) + offsetof(struct bpf_insn, imm), 4, + kdesc->insn + sizeof(struct bpf_insn) + offsetof(struct bpf_insn, imm)); + goto log; + } + /* remember insn offset, so we can copy BTF ID and FD later */ + kdesc->insn = insn; + emit_bpf_find_by_name_kind(gen, relo); + emit_check_err(gen); + /* store btf_id into insn[insn_idx].imm */ + emit(gen, BPF_STX_MEM(BPF_W, BPF_REG_8, BPF_REG_7, offsetof(struct bpf_insn, imm))); + /* store btf_obj_fd into insn[insn_idx + 1].imm */ + emit(gen, BPF_ALU64_IMM(BPF_RSH, BPF_REG_7, 32)); + emit(gen, BPF_STX_MEM(BPF_W, BPF_REG_8, BPF_REG_7, + sizeof(struct bpf_insn) + offsetof(struct bpf_insn, imm))); +log: + if (!gen->log_level) + return; + emit(gen, BPF_LDX_MEM(BPF_W, BPF_REG_7, BPF_REG_8, + offsetof(struct bpf_insn, imm))); + emit(gen, BPF_LDX_MEM(BPF_H, BPF_REG_9, BPF_REG_8, sizeof(struct bpf_insn) + + offsetof(struct bpf_insn, imm))); + debug_regs(gen, BPF_REG_7, BPF_REG_9, " var (%s:count=%d): imm: %%d, fd: %%d", + relo->name, kdesc->ref); +} + +static void emit_relo(struct bpf_gen *gen, struct ksym_relo_desc *relo, int insns) +{ + int insn; + + pr_debug("gen: emit_relo (%d): %s at %d\n", relo->kind, relo->name, relo->insn_idx); + insn = insns + sizeof(struct bpf_insn) * relo->insn_idx; + emit2(gen, BPF_LD_IMM64_RAW_FULL(BPF_REG_8, BPF_PSEUDO_MAP_IDX_VALUE, 0, 0, 0, insn)); + switch (relo->kind) { + case BTF_KIND_VAR: + emit_relo_ksym_btf(gen, relo, insn); + break; + case BTF_KIND_FUNC: + emit_relo_kfunc_btf(gen, relo, insn); + break; + default: + pr_warn("Unknown relocation kind '%d'\n", relo->kind); + gen->error = -EDOM; + return; } } @@ -571,14 +772,22 @@ static void cleanup_relos(struct bpf_gen *gen, int insns) { int i, insn; - for (i = 0; i < gen->relo_cnt; i++) { - if (gen->relos[i].kind != BTF_KIND_VAR) - continue; - /* close fd recorded in insn[insn_idx + 1].imm */ - insn = insns + - sizeof(struct bpf_insn) * (gen->relos[i].insn_idx + 1) + - offsetof(struct bpf_insn, imm); - emit_sys_close_blob(gen, insn); + for (i = 0; i < gen->nr_ksyms; i++) { + if (gen->ksyms[i].kind == BTF_KIND_VAR) { + /* close fd recorded in insn[insn_idx + 1].imm */ + insn = gen->ksyms[i].insn; + insn += sizeof(struct bpf_insn) + offsetof(struct bpf_insn, imm); + emit_sys_close_blob(gen, insn); + } else { /* BTF_KIND_FUNC */ + emit_sys_close_blob(gen, blob_fd_array_off(gen, gen->ksyms[i].off)); + if (gen->ksyms[i].off < MAX_FD_ARRAY_SZ) + gen->nr_fd_array--; + } + } + if (gen->nr_ksyms) { + free(gen->ksyms); + gen->nr_ksyms = 0; + gen->ksyms = NULL; } if (gen->relo_cnt) { free(gen->relos); @@ -637,9 +846,8 @@ void bpf_gen__prog_load(struct bpf_gen *gen, /* populate union bpf_attr with a pointer to line_info */ emit_rel_store(gen, attr_field(prog_load_attr, line_info), line_info); - /* populate union bpf_attr fd_array with a pointer to stack where map_fds are saved */ - emit_rel_store_sp(gen, attr_field(prog_load_attr, fd_array), - stack_off(map_fd[0])); + /* populate union bpf_attr fd_array with a pointer to data where map_fds are saved */ + emit_rel_store(gen, attr_field(prog_load_attr, fd_array), gen->fd_array); /* populate union bpf_attr with user provided log details */ move_ctx2blob(gen, attr_field(prog_load_attr, log_level), 4, @@ -706,8 +914,8 @@ void bpf_gen__map_update_elem(struct bpf_gen *gen, int map_idx, void *pvalue, emit(gen, BPF_EMIT_CALL(BPF_FUNC_copy_from_user)); map_update_attr = add_data(gen, &attr, attr_size); - move_stack2blob(gen, attr_field(map_update_attr, map_fd), 4, - stack_off(map_fd[map_idx])); + move_blob2blob(gen, attr_field(map_update_attr, map_fd), 4, + blob_fd_array_off(gen, map_idx)); emit_rel_store(gen, attr_field(map_update_attr, key), key); emit_rel_store(gen, attr_field(map_update_attr, value), value); /* emit MAP_UPDATE_ELEM command */ @@ -725,8 +933,8 @@ void bpf_gen__map_freeze(struct bpf_gen *gen, int map_idx) memset(&attr, 0, attr_size); pr_debug("gen: map_freeze: idx %d\n", map_idx); map_freeze_attr = add_data(gen, &attr, attr_size); - move_stack2blob(gen, attr_field(map_freeze_attr, map_fd), 4, - stack_off(map_fd[map_idx])); + move_blob2blob(gen, attr_field(map_freeze_attr, map_fd), 4, + blob_fd_array_off(gen, map_idx)); /* emit MAP_FREEZE command */ emit_sys_bpf(gen, BPF_MAP_FREEZE, map_freeze_attr, attr_size); debug_ret(gen, "map_freeze"); diff --git a/tools/lib/bpf/libbpf.c b/tools/lib/bpf/libbpf.c index 092cf4bd18793..f32fa51b1e63e 100644 --- a/tools/lib/bpf/libbpf.c +++ b/tools/lib/bpf/libbpf.c @@ -6360,12 +6360,12 @@ static int bpf_program__record_externs(struct bpf_program *prog) ext->name); return -ENOTSUP; } - bpf_gen__record_extern(obj->gen_loader, ext->name, BTF_KIND_VAR, - relo->insn_idx); + bpf_gen__record_extern(obj->gen_loader, ext->name, ext->is_weak, + BTF_KIND_VAR, relo->insn_idx); break; case RELO_EXTERN_FUNC: - bpf_gen__record_extern(obj->gen_loader, ext->name, BTF_KIND_FUNC, - relo->insn_idx); + bpf_gen__record_extern(obj->gen_loader, ext->name, ext->is_weak, + BTF_KIND_FUNC, relo->insn_idx); break; default: continue; From c48e51c8b07aba8a18125221cb67a40cb1256bf2 Mon Sep 17 00:00:00 2001 From: Kumar Kartikeya Dwivedi Date: Sat, 2 Oct 2021 06:47:57 +0530 Subject: [PATCH 009/181] bpf: selftests: Add selftests for module kfunc support This adds selftests that tests the success and failure path for modules kfuncs (in presence of invalid kfunc calls) for both libbpf and gen_loader. It also adds a prog_test kfunc_btf_id_list so that we can add module BTF ID set from bpf_testmod. This also introduces a couple of test cases to verifier selftests for validating whether we get an error or not depending on if invalid kfunc call remains after elimination of unreachable instructions. Signed-off-by: Kumar Kartikeya Dwivedi Signed-off-by: Alexei Starovoitov Link: https://lore.kernel.org/bpf/20211002011757.311265-10-memxor@gmail.com --- include/linux/btf.h | 1 + kernel/bpf/btf.c | 1 + net/bpf/test_run.c | 5 +- tools/testing/selftests/bpf/Makefile | 7 +-- .../selftests/bpf/bpf_testmod/bpf_testmod.c | 23 +++++++++- .../selftests/bpf/prog_tests/ksyms_module.c | 29 ++++++------ .../bpf/prog_tests/ksyms_module_libbpf.c | 28 +++++++++++ .../selftests/bpf/progs/test_ksyms_module.c | 46 ++++++++++++++----- tools/testing/selftests/bpf/verifier/calls.c | 23 ++++++++++ 9 files changed, 132 insertions(+), 31 deletions(-) create mode 100644 tools/testing/selftests/bpf/prog_tests/ksyms_module_libbpf.c diff --git a/include/linux/btf.h b/include/linux/btf.h index 1d56cd2bb3626..203eef993d763 100644 --- a/include/linux/btf.h +++ b/include/linux/btf.h @@ -275,5 +275,6 @@ static inline bool bpf_check_mod_kfunc_call(struct kfunc_btf_id_list *klist, THIS_MODULE } extern struct kfunc_btf_id_list bpf_tcp_ca_kfunc_list; +extern struct kfunc_btf_id_list prog_test_kfunc_list; #endif diff --git a/kernel/bpf/btf.c b/kernel/bpf/btf.c index 1460dff3c1540..2ebffb9f57ebb 100644 --- a/kernel/bpf/btf.c +++ b/kernel/bpf/btf.c @@ -6397,3 +6397,4 @@ bool bpf_check_mod_kfunc_call(struct kfunc_btf_id_list *klist, u32 kfunc_id, EXPORT_SYMBOL_GPL(name) DEFINE_KFUNC_BTF_ID_LIST(bpf_tcp_ca_kfunc_list); +DEFINE_KFUNC_BTF_ID_LIST(prog_test_kfunc_list); diff --git a/net/bpf/test_run.c b/net/bpf/test_run.c index b1f6f5237de61..529608784aa89 100644 --- a/net/bpf/test_run.c +++ b/net/bpf/test_run.c @@ -2,6 +2,7 @@ /* Copyright (c) 2017 Facebook */ #include +#include #include #include #include @@ -243,7 +244,9 @@ BTF_SET_END(test_sk_kfunc_ids) bool bpf_prog_test_check_kfunc_call(u32 kfunc_id, struct module *owner) { - return btf_id_set_contains(&test_sk_kfunc_ids, kfunc_id); + if (btf_id_set_contains(&test_sk_kfunc_ids, kfunc_id)) + return true; + return bpf_check_mod_kfunc_call(&prog_test_kfunc_list, kfunc_id, owner); } static void *bpf_test_init(const union bpf_attr *kattr, u32 size, diff --git a/tools/testing/selftests/bpf/Makefile b/tools/testing/selftests/bpf/Makefile index 5a94d0900d1b8..c5c9a9f50d8d0 100644 --- a/tools/testing/selftests/bpf/Makefile +++ b/tools/testing/selftests/bpf/Makefile @@ -315,8 +315,9 @@ LINKED_SKELS := test_static_linked.skel.h linked_funcs.skel.h \ linked_vars.skel.h linked_maps.skel.h LSKELS := kfunc_call_test.c fentry_test.c fexit_test.c fexit_sleep.c \ - test_ksyms_module.c test_ringbuf.c atomics.c trace_printk.c \ - trace_vprintk.c + test_ringbuf.c atomics.c trace_printk.c trace_vprintk.c +# Generate both light skeleton and libbpf skeleton for these +LSKELS_EXTRA := test_ksyms_module.c SKEL_BLACKLIST += $$(LSKELS) test_static_linked.skel.h-deps := test_static_linked1.o test_static_linked2.o @@ -346,7 +347,7 @@ TRUNNER_BPF_OBJS := $$(patsubst %.c,$$(TRUNNER_OUTPUT)/%.o, $$(TRUNNER_BPF_SRCS) TRUNNER_BPF_SKELS := $$(patsubst %.c,$$(TRUNNER_OUTPUT)/%.skel.h, \ $$(filter-out $(SKEL_BLACKLIST) $(LINKED_BPF_SRCS),\ $$(TRUNNER_BPF_SRCS))) -TRUNNER_BPF_LSKELS := $$(patsubst %.c,$$(TRUNNER_OUTPUT)/%.lskel.h, $$(LSKELS)) +TRUNNER_BPF_LSKELS := $$(patsubst %.c,$$(TRUNNER_OUTPUT)/%.lskel.h, $$(LSKELS) $$(LSKELS_EXTRA)) TRUNNER_BPF_SKELS_LINKED := $$(addprefix $$(TRUNNER_OUTPUT)/,$(LINKED_SKELS)) TEST_GEN_FILES += $$(TRUNNER_BPF_OBJS) diff --git a/tools/testing/selftests/bpf/bpf_testmod/bpf_testmod.c b/tools/testing/selftests/bpf/bpf_testmod/bpf_testmod.c index 50fc5561110a4..b892948dc134c 100644 --- a/tools/testing/selftests/bpf/bpf_testmod/bpf_testmod.c +++ b/tools/testing/selftests/bpf/bpf_testmod/bpf_testmod.c @@ -1,5 +1,7 @@ // SPDX-License-Identifier: GPL-2.0 /* Copyright (c) 2020 Facebook */ +#include +#include #include #include #include @@ -13,6 +15,12 @@ DEFINE_PER_CPU(int, bpf_testmod_ksym_percpu) = 123; +noinline void +bpf_testmod_test_mod_kfunc(int i) +{ + *(int *)this_cpu_ptr(&bpf_testmod_ksym_percpu) = i; +} + noinline int bpf_testmod_loop_test(int n) { int i, sum = 0; @@ -71,13 +79,26 @@ static struct bin_attribute bin_attr_bpf_testmod_file __ro_after_init = { .write = bpf_testmod_test_write, }; +BTF_SET_START(bpf_testmod_kfunc_ids) +BTF_ID(func, bpf_testmod_test_mod_kfunc) +BTF_SET_END(bpf_testmod_kfunc_ids) + +static DEFINE_KFUNC_BTF_ID_SET(&bpf_testmod_kfunc_ids, bpf_testmod_kfunc_btf_set); + static int bpf_testmod_init(void) { - return sysfs_create_bin_file(kernel_kobj, &bin_attr_bpf_testmod_file); + int ret; + + ret = sysfs_create_bin_file(kernel_kobj, &bin_attr_bpf_testmod_file); + if (ret) + return ret; + register_kfunc_btf_id_set(&prog_test_kfunc_list, &bpf_testmod_kfunc_btf_set); + return 0; } static void bpf_testmod_exit(void) { + unregister_kfunc_btf_id_set(&prog_test_kfunc_list, &bpf_testmod_kfunc_btf_set); return sysfs_remove_bin_file(kernel_kobj, &bin_attr_bpf_testmod_file); } diff --git a/tools/testing/selftests/bpf/prog_tests/ksyms_module.c b/tools/testing/selftests/bpf/prog_tests/ksyms_module.c index 2cd5cded543fa..831447878d7b0 100644 --- a/tools/testing/selftests/bpf/prog_tests/ksyms_module.c +++ b/tools/testing/selftests/bpf/prog_tests/ksyms_module.c @@ -2,30 +2,29 @@ /* Copyright (c) 2021 Facebook */ #include -#include -#include +#include #include "test_ksyms_module.lskel.h" -static int duration; - void test_ksyms_module(void) { - struct test_ksyms_module* skel; + struct test_ksyms_module *skel; + int retval; int err; - skel = test_ksyms_module__open_and_load(); - if (CHECK(!skel, "skel_open", "failed to open skeleton\n")) + if (!env.has_testmod) { + test__skip(); return; + } - err = test_ksyms_module__attach(skel); - if (CHECK(err, "skel_attach", "skeleton attach failed: %d\n", err)) + skel = test_ksyms_module__open_and_load(); + if (!ASSERT_OK_PTR(skel, "test_ksyms_module__open_and_load")) + return; + err = bpf_prog_test_run(skel->progs.load.prog_fd, 1, &pkt_v4, sizeof(pkt_v4), + NULL, NULL, (__u32 *)&retval, NULL); + if (!ASSERT_OK(err, "bpf_prog_test_run")) goto cleanup; - - usleep(1); - - ASSERT_EQ(skel->bss->triggered, true, "triggered"); - ASSERT_EQ(skel->bss->out_mod_ksym_global, 123, "global_ksym_val"); - + ASSERT_EQ(retval, 0, "retval"); + ASSERT_EQ(skel->bss->out_bpf_testmod_ksym, 42, "bpf_testmod_ksym"); cleanup: test_ksyms_module__destroy(skel); } diff --git a/tools/testing/selftests/bpf/prog_tests/ksyms_module_libbpf.c b/tools/testing/selftests/bpf/prog_tests/ksyms_module_libbpf.c new file mode 100644 index 0000000000000..e6343ef63af96 --- /dev/null +++ b/tools/testing/selftests/bpf/prog_tests/ksyms_module_libbpf.c @@ -0,0 +1,28 @@ +// SPDX-License-Identifier: GPL-2.0 + +#include +#include +#include "test_ksyms_module.skel.h" + +void test_ksyms_module_libbpf(void) +{ + struct test_ksyms_module *skel; + int retval, err; + + if (!env.has_testmod) { + test__skip(); + return; + } + + skel = test_ksyms_module__open_and_load(); + if (!ASSERT_OK_PTR(skel, "test_ksyms_module__open")) + return; + err = bpf_prog_test_run(bpf_program__fd(skel->progs.load), 1, &pkt_v4, + sizeof(pkt_v4), NULL, NULL, (__u32 *)&retval, NULL); + if (!ASSERT_OK(err, "bpf_prog_test_run")) + goto cleanup; + ASSERT_EQ(retval, 0, "retval"); + ASSERT_EQ(skel->bss->out_bpf_testmod_ksym, 42, "bpf_testmod_ksym"); +cleanup: + test_ksyms_module__destroy(skel); +} diff --git a/tools/testing/selftests/bpf/progs/test_ksyms_module.c b/tools/testing/selftests/bpf/progs/test_ksyms_module.c index d6a0b3086b906..0650d918c096f 100644 --- a/tools/testing/selftests/bpf/progs/test_ksyms_module.c +++ b/tools/testing/selftests/bpf/progs/test_ksyms_module.c @@ -2,24 +2,48 @@ /* Copyright (c) 2021 Facebook */ #include "vmlinux.h" - #include +#define X_0(x) +#define X_1(x) x X_0(x) +#define X_2(x) x X_1(x) +#define X_3(x) x X_2(x) +#define X_4(x) x X_3(x) +#define X_5(x) x X_4(x) +#define X_6(x) x X_5(x) +#define X_7(x) x X_6(x) +#define X_8(x) x X_7(x) +#define X_9(x) x X_8(x) +#define X_10(x) x X_9(x) +#define REPEAT_256(Y) X_2(X_10(X_10(Y))) X_5(X_10(Y)) X_6(Y) + extern const int bpf_testmod_ksym_percpu __ksym; +extern void bpf_testmod_test_mod_kfunc(int i) __ksym; +extern void bpf_testmod_invalid_mod_kfunc(void) __ksym __weak; -int out_mod_ksym_global = 0; -bool triggered = false; +int out_bpf_testmod_ksym = 0; +const volatile int x = 0; -SEC("raw_tp/sys_enter") -int handler(const void *ctx) +SEC("tc") +int load(struct __sk_buff *skb) { - int *val; - __u32 cpu; - - val = (int *)bpf_this_cpu_ptr(&bpf_testmod_ksym_percpu); - out_mod_ksym_global = *val; - triggered = true; + /* This will be kept by clang, but removed by verifier. Since it is + * marked as __weak, libbpf and gen_loader don't error out if BTF ID + * is not found for it, instead imm and off is set to 0 for it. + */ + if (x) + bpf_testmod_invalid_mod_kfunc(); + bpf_testmod_test_mod_kfunc(42); + out_bpf_testmod_ksym = *(int *)bpf_this_cpu_ptr(&bpf_testmod_ksym_percpu); + return 0; +} +SEC("tc") +int load_256(struct __sk_buff *skb) +{ + /* this will fail if kfunc doesn't reuse its own btf fd index */ + REPEAT_256(bpf_testmod_test_mod_kfunc(42);); + bpf_testmod_test_mod_kfunc(42); return 0; } diff --git a/tools/testing/selftests/bpf/verifier/calls.c b/tools/testing/selftests/bpf/verifier/calls.c index 336a749673d19..d7b74eb283339 100644 --- a/tools/testing/selftests/bpf/verifier/calls.c +++ b/tools/testing/selftests/bpf/verifier/calls.c @@ -1,3 +1,26 @@ +{ + "calls: invalid kfunc call not eliminated", + .insns = { + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, BPF_PSEUDO_KFUNC_CALL, 0, 0), + BPF_MOV64_IMM(BPF_REG_0, 1), + BPF_EXIT_INSN(), + }, + .prog_type = BPF_PROG_TYPE_TRACEPOINT, + .result = REJECT, + .errstr = "invalid kernel function call not eliminated in verifier pass", +}, +{ + "calls: invalid kfunc call unreachable", + .insns = { + BPF_MOV64_IMM(BPF_REG_0, 1), + BPF_JMP_IMM(BPF_JGT, BPF_REG_0, 0, 2), + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, BPF_PSEUDO_KFUNC_CALL, 0, 0), + BPF_MOV64_IMM(BPF_REG_0, 1), + BPF_EXIT_INSN(), + }, + .prog_type = BPF_PROG_TYPE_TRACEPOINT, + .result = ACCEPT, +}, { "calls: basic sanity", .insns = { From 0640c77c46cb84328d5e08aa85a781a60be8b02b Mon Sep 17 00:00:00 2001 From: Andrey Ignatov Date: Tue, 5 Oct 2021 17:18:38 -0700 Subject: [PATCH 010/181] bpf: Avoid retpoline for bpf_for_each_map_elem Similarly to 09772d92cd5a ("bpf: avoid retpoline for lookup/update/delete calls on maps") and 84430d4232c3 ("bpf, verifier: avoid retpoline for map push/pop/peek operation") avoid indirect call while calling bpf_for_each_map_elem. Before (a program fragment): ; if (rules_map) { 142: (15) if r4 == 0x0 goto pc+8 143: (bf) r3 = r10 ; bpf_for_each_map_elem(rules_map, process_each_rule, &ctx, 0); 144: (07) r3 += -24 145: (bf) r1 = r4 146: (18) r2 = subprog[+5] 148: (b7) r4 = 0 149: (85) call bpf_for_each_map_elem#143680 <-- indirect call via helper After (same program fragment): ; if (rules_map) { 142: (15) if r4 == 0x0 goto pc+8 143: (bf) r3 = r10 ; bpf_for_each_map_elem(rules_map, process_each_rule, &ctx, 0); 144: (07) r3 += -24 145: (bf) r1 = r4 146: (18) r2 = subprog[+5] 148: (b7) r4 = 0 149: (85) call bpf_for_each_array_elem#170336 <-- direct call On a benchmark that calls bpf_for_each_map_elem() once and does many other things (mostly checking fields in skb) with CONFIG_RETPOLINE=y it makes program faster. Before: ============================================================================ Benchmark.cpp time/iter iters/s ============================================================================ IngressMatchByRemoteEndpoint 80.78ns 12.38M IngressMatchByRemoteIP 80.66ns 12.40M IngressMatchByRemotePort 80.87ns 12.37M After: ============================================================================ Benchmark.cpp time/iter iters/s ============================================================================ IngressMatchByRemoteEndpoint 73.49ns 13.61M IngressMatchByRemoteIP 71.48ns 13.99M IngressMatchByRemotePort 70.39ns 14.21M Signed-off-by: Andrey Ignatov Signed-off-by: Alexei Starovoitov Link: https://lore.kernel.org/bpf/20211006001838.75607-1-rdna@fb.com --- kernel/bpf/verifier.c | 11 ++++++++++- 1 file changed, 10 insertions(+), 1 deletion(-) diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c index 68d6862de82e7..20900a1bac12f 100644 --- a/kernel/bpf/verifier.c +++ b/kernel/bpf/verifier.c @@ -13112,7 +13112,8 @@ static int do_misc_fixups(struct bpf_verifier_env *env) insn->imm == BPF_FUNC_map_push_elem || insn->imm == BPF_FUNC_map_pop_elem || insn->imm == BPF_FUNC_map_peek_elem || - insn->imm == BPF_FUNC_redirect_map)) { + insn->imm == BPF_FUNC_redirect_map || + insn->imm == BPF_FUNC_for_each_map_elem)) { aux = &env->insn_aux_data[i + delta]; if (bpf_map_ptr_poisoned(aux)) goto patch_call_imm; @@ -13156,6 +13157,11 @@ static int do_misc_fixups(struct bpf_verifier_env *env) (int (*)(struct bpf_map *map, void *value))NULL)); BUILD_BUG_ON(!__same_type(ops->map_redirect, (int (*)(struct bpf_map *map, u32 ifindex, u64 flags))NULL)); + BUILD_BUG_ON(!__same_type(ops->map_for_each_callback, + (int (*)(struct bpf_map *map, + bpf_callback_t callback_fn, + void *callback_ctx, + u64 flags))NULL)); patch_map_ops_generic: switch (insn->imm) { @@ -13180,6 +13186,9 @@ static int do_misc_fixups(struct bpf_verifier_env *env) case BPF_FUNC_redirect_map: insn->imm = BPF_CALL_IMM(ops->map_redirect); continue; + case BPF_FUNC_for_each_map_elem: + insn->imm = BPF_CALL_IMM(ops->map_for_each_callback); + continue; } goto patch_call_imm; From 57a610f1c58fa493315e1c24eef6d992cdf4c4a9 Mon Sep 17 00:00:00 2001 From: Jie Meng Date: Fri, 1 Oct 2021 20:56:26 -0700 Subject: [PATCH 011/181] bpf, x64: Save bytes for DIV by reducing reg copies Instead of unconditionally performing push/pop on %rax/%rdx in case of division/modulo, we can save a few bytes in case of destination register being either BPF r0 (%rax) or r3 (%rdx) since the result is written in there anyway. Also, we do not need to copy the source to %r11 unless the source is either %rax, %rdx or an immediate. For example, before the patch: 22: push %rax 23: push %rdx 24: mov %rsi,%r11 27: xor %edx,%edx 29: div %r11 2c: mov %rax,%r11 2f: pop %rdx 30: pop %rax 31: mov %r11,%rax After: 22: push %rdx 23: xor %edx,%edx 25: div %rsi 28: pop %rdx Signed-off-by: Jie Meng Signed-off-by: Alexei Starovoitov Signed-off-by: Daniel Borkmann Tested-by: Daniel Borkmann Acked-by: Daniel Borkmann Link: https://lore.kernel.org/bpf/20211002035626.2041910-1-jmeng@fb.com --- arch/x86/net/bpf_jit_comp.c | 71 +++++++++++++--------- tools/testing/selftests/bpf/verifier/jit.c | 47 ++++++++++++++ 2 files changed, 89 insertions(+), 29 deletions(-) diff --git a/arch/x86/net/bpf_jit_comp.c b/arch/x86/net/bpf_jit_comp.c index 576ef1a6954a6..5a0edea3cc2e4 100644 --- a/arch/x86/net/bpf_jit_comp.c +++ b/arch/x86/net/bpf_jit_comp.c @@ -1028,19 +1028,30 @@ static int do_jit(struct bpf_prog *bpf_prog, int *addrs, u8 *image, case BPF_ALU64 | BPF_MOD | BPF_X: case BPF_ALU64 | BPF_DIV | BPF_X: case BPF_ALU64 | BPF_MOD | BPF_K: - case BPF_ALU64 | BPF_DIV | BPF_K: - EMIT1(0x50); /* push rax */ - EMIT1(0x52); /* push rdx */ - - if (BPF_SRC(insn->code) == BPF_X) - /* mov r11, src_reg */ - EMIT_mov(AUX_REG, src_reg); - else + case BPF_ALU64 | BPF_DIV | BPF_K: { + bool is64 = BPF_CLASS(insn->code) == BPF_ALU64; + + if (dst_reg != BPF_REG_0) + EMIT1(0x50); /* push rax */ + if (dst_reg != BPF_REG_3) + EMIT1(0x52); /* push rdx */ + + if (BPF_SRC(insn->code) == BPF_X) { + if (src_reg == BPF_REG_0 || + src_reg == BPF_REG_3) { + /* mov r11, src_reg */ + EMIT_mov(AUX_REG, src_reg); + src_reg = AUX_REG; + } + } else { /* mov r11, imm32 */ EMIT3_off32(0x49, 0xC7, 0xC3, imm32); + src_reg = AUX_REG; + } - /* mov rax, dst_reg */ - EMIT_mov(BPF_REG_0, dst_reg); + if (dst_reg != BPF_REG_0) + /* mov rax, dst_reg */ + emit_mov_reg(&prog, is64, BPF_REG_0, dst_reg); /* * xor edx, edx @@ -1048,26 +1059,28 @@ static int do_jit(struct bpf_prog *bpf_prog, int *addrs, u8 *image, */ EMIT2(0x31, 0xd2); - if (BPF_CLASS(insn->code) == BPF_ALU64) - /* div r11 */ - EMIT3(0x49, 0xF7, 0xF3); - else - /* div r11d */ - EMIT3(0x41, 0xF7, 0xF3); - - if (BPF_OP(insn->code) == BPF_MOD) - /* mov r11, rdx */ - EMIT3(0x49, 0x89, 0xD3); - else - /* mov r11, rax */ - EMIT3(0x49, 0x89, 0xC3); - - EMIT1(0x5A); /* pop rdx */ - EMIT1(0x58); /* pop rax */ - - /* mov dst_reg, r11 */ - EMIT_mov(dst_reg, AUX_REG); + if (is64) + EMIT1(add_1mod(0x48, src_reg)); + else if (is_ereg(src_reg)) + EMIT1(add_1mod(0x40, src_reg)); + /* div src_reg */ + EMIT2(0xF7, add_1reg(0xF0, src_reg)); + + if (BPF_OP(insn->code) == BPF_MOD && + dst_reg != BPF_REG_3) + /* mov dst_reg, rdx */ + emit_mov_reg(&prog, is64, dst_reg, BPF_REG_3); + else if (BPF_OP(insn->code) == BPF_DIV && + dst_reg != BPF_REG_0) + /* mov dst_reg, rax */ + emit_mov_reg(&prog, is64, dst_reg, BPF_REG_0); + + if (dst_reg != BPF_REG_3) + EMIT1(0x5A); /* pop rdx */ + if (dst_reg != BPF_REG_0) + EMIT1(0x58); /* pop rax */ break; + } case BPF_ALU | BPF_MUL | BPF_K: case BPF_ALU64 | BPF_MUL | BPF_K: diff --git a/tools/testing/selftests/bpf/verifier/jit.c b/tools/testing/selftests/bpf/verifier/jit.c index eedcb752bf707..79021c30e51eb 100644 --- a/tools/testing/selftests/bpf/verifier/jit.c +++ b/tools/testing/selftests/bpf/verifier/jit.c @@ -102,6 +102,53 @@ .result = ACCEPT, .retval = 2, }, +{ + "jit: various div tests", + .insns = { + BPF_LD_IMM64(BPF_REG_2, 0xefeffeULL), + BPF_LD_IMM64(BPF_REG_0, 0xeeff0d413122ULL), + BPF_LD_IMM64(BPF_REG_1, 0xfefeeeULL), + BPF_ALU64_REG(BPF_DIV, BPF_REG_0, BPF_REG_1), + BPF_JMP_REG(BPF_JEQ, BPF_REG_0, BPF_REG_2, 2), + BPF_MOV64_IMM(BPF_REG_0, 1), + BPF_EXIT_INSN(), + BPF_LD_IMM64(BPF_REG_3, 0xeeff0d413122ULL), + BPF_ALU64_IMM(BPF_DIV, BPF_REG_3, 0xfefeeeULL), + BPF_JMP_REG(BPF_JEQ, BPF_REG_3, BPF_REG_2, 2), + BPF_MOV64_IMM(BPF_REG_0, 1), + BPF_EXIT_INSN(), + BPF_LD_IMM64(BPF_REG_2, 0xaa93ULL), + BPF_ALU64_IMM(BPF_MOD, BPF_REG_1, 0xbeefULL), + BPF_JMP_REG(BPF_JEQ, BPF_REG_1, BPF_REG_2, 2), + BPF_MOV64_IMM(BPF_REG_0, 1), + BPF_EXIT_INSN(), + BPF_LD_IMM64(BPF_REG_1, 0xfefeeeULL), + BPF_LD_IMM64(BPF_REG_3, 0xbeefULL), + BPF_ALU64_REG(BPF_MOD, BPF_REG_1, BPF_REG_3), + BPF_JMP_REG(BPF_JEQ, BPF_REG_1, BPF_REG_2, 2), + BPF_MOV64_IMM(BPF_REG_0, 1), + BPF_EXIT_INSN(), + BPF_LD_IMM64(BPF_REG_2, 0x5ee1dULL), + BPF_LD_IMM64(BPF_REG_1, 0xfefeeeULL), + BPF_LD_IMM64(BPF_REG_3, 0x2bULL), + BPF_ALU32_REG(BPF_DIV, BPF_REG_1, BPF_REG_3), + BPF_JMP_REG(BPF_JEQ, BPF_REG_1, BPF_REG_2, 2), + BPF_MOV64_IMM(BPF_REG_0, 1), + BPF_EXIT_INSN(), + BPF_ALU32_REG(BPF_DIV, BPF_REG_1, BPF_REG_1), + BPF_JMP_IMM(BPF_JEQ, BPF_REG_1, 1, 2), + BPF_MOV64_IMM(BPF_REG_0, 1), + BPF_EXIT_INSN(), + BPF_ALU64_REG(BPF_MOD, BPF_REG_2, BPF_REG_2), + BPF_JMP_IMM(BPF_JEQ, BPF_REG_2, 0, 2), + BPF_MOV64_IMM(BPF_REG_0, 1), + BPF_EXIT_INSN(), + BPF_MOV64_IMM(BPF_REG_0, 2), + BPF_EXIT_INSN(), + }, + .result = ACCEPT, + .retval = 2, +}, { "jit: jsgt, jslt", .insns = { From 7ca61121598338ab713a5c705a843f3b8fed9f90 Mon Sep 17 00:00:00 2001 From: Andrii Nakryiko Date: Tue, 5 Oct 2021 22:11:05 -0700 Subject: [PATCH 012/181] libbpf: Add API that copies all BTF types from one BTF object to another Add a bulk copying api, btf__add_btf(), that speeds up and simplifies appending entire contents of one BTF object to another one, taking care of copying BTF type data, adjusting resulting BTF type IDs according to their new locations in the destination BTF object, as well as copying and deduplicating all the referenced strings and updating all the string offsets in new BTF types as appropriate. This API is intended to be used from tools that are generating and otherwise manipulating BTFs generically, such as pahole. In pahole's case, this API is useful for speeding up parallelized BTF encoding, as it allows pahole to offload all the intricacies of BTF type copying to libbpf and handle the parallelization aspects of the process. Signed-off-by: Andrii Nakryiko Signed-off-by: Daniel Borkmann Acked-by: Song Liu Cc: Arnaldo Carvalho de Melo Link: https://lore.kernel.org/bpf/20211006051107.17921-2-andrii@kernel.org --- tools/lib/bpf/btf.c | 114 ++++++++++++++++++++++++++++++++++++++- tools/lib/bpf/btf.h | 22 ++++++++ tools/lib/bpf/libbpf.map | 1 + 3 files changed, 135 insertions(+), 2 deletions(-) diff --git a/tools/lib/bpf/btf.c b/tools/lib/bpf/btf.c index 7774f99afa6e6..60fbd1c6d466d 100644 --- a/tools/lib/bpf/btf.c +++ b/tools/lib/bpf/btf.c @@ -189,12 +189,17 @@ int libbpf_ensure_mem(void **data, size_t *cap_cnt, size_t elem_sz, size_t need_ return 0; } +static void *btf_add_type_offs_mem(struct btf *btf, size_t add_cnt) +{ + return libbpf_add_mem((void **)&btf->type_offs, &btf->type_offs_cap, sizeof(__u32), + btf->nr_types, BTF_MAX_NR_TYPES, add_cnt); +} + static int btf_add_type_idx_entry(struct btf *btf, __u32 type_off) { __u32 *p; - p = libbpf_add_mem((void **)&btf->type_offs, &btf->type_offs_cap, sizeof(__u32), - btf->nr_types, BTF_MAX_NR_TYPES, 1); + p = btf_add_type_offs_mem(btf, 1); if (!p) return -ENOMEM; @@ -1703,6 +1708,111 @@ int btf__add_type(struct btf *btf, const struct btf *src_btf, const struct btf_t return btf_commit_type(btf, sz); } +static int btf_rewrite_type_ids(__u32 *type_id, void *ctx) +{ + struct btf *btf = ctx; + + if (!*type_id) /* nothing to do for VOID references */ + return 0; + + /* we haven't updated btf's type count yet, so + * btf->start_id + btf->nr_types - 1 is the type ID offset we should + * add to all newly added BTF types + */ + *type_id += btf->start_id + btf->nr_types - 1; + return 0; +} + +int btf__add_btf(struct btf *btf, const struct btf *src_btf) +{ + struct btf_pipe p = { .src = src_btf, .dst = btf }; + int data_sz, sz, cnt, i, err, old_strs_len; + __u32 *off; + void *t; + + /* appending split BTF isn't supported yet */ + if (src_btf->base_btf) + return libbpf_err(-ENOTSUP); + + /* deconstruct BTF, if necessary, and invalidate raw_data */ + if (btf_ensure_modifiable(btf)) + return libbpf_err(-ENOMEM); + + /* remember original strings section size if we have to roll back + * partial strings section changes + */ + old_strs_len = btf->hdr->str_len; + + data_sz = src_btf->hdr->type_len; + cnt = btf__get_nr_types(src_btf); + + /* pre-allocate enough memory for new types */ + t = btf_add_type_mem(btf, data_sz); + if (!t) + return libbpf_err(-ENOMEM); + + /* pre-allocate enough memory for type offset index for new types */ + off = btf_add_type_offs_mem(btf, cnt); + if (!off) + return libbpf_err(-ENOMEM); + + /* bulk copy types data for all types from src_btf */ + memcpy(t, src_btf->types_data, data_sz); + + for (i = 0; i < cnt; i++) { + sz = btf_type_size(t); + if (sz < 0) { + /* unlikely, has to be corrupted src_btf */ + err = sz; + goto err_out; + } + + /* fill out type ID to type offset mapping for lookups by type ID */ + *off = t - btf->types_data; + + /* add, dedup, and remap strings referenced by this BTF type */ + err = btf_type_visit_str_offs(t, btf_rewrite_str, &p); + if (err) + goto err_out; + + /* remap all type IDs referenced from this BTF type */ + err = btf_type_visit_type_ids(t, btf_rewrite_type_ids, btf); + if (err) + goto err_out; + + /* go to next type data and type offset index entry */ + t += sz; + off++; + } + + /* Up until now any of the copied type data was effectively invisible, + * so if we exited early before this point due to error, BTF would be + * effectively unmodified. There would be extra internal memory + * pre-allocated, but it would not be available for querying. But now + * that we've copied and rewritten all the data successfully, we can + * update type count and various internal offsets and sizes to + * "commit" the changes and made them visible to the outside world. + */ + btf->hdr->type_len += data_sz; + btf->hdr->str_off += data_sz; + btf->nr_types += cnt; + + /* return type ID of the first added BTF type */ + return btf->start_id + btf->nr_types - cnt; +err_out: + /* zero out preallocated memory as if it was just allocated with + * libbpf_add_mem() + */ + memset(btf->types_data + btf->hdr->type_len, 0, data_sz); + memset(btf->strs_data + old_strs_len, 0, btf->hdr->str_len - old_strs_len); + + /* and now restore original strings section size; types data size + * wasn't modified, so doesn't need restoring, see big comment above */ + btf->hdr->str_len = old_strs_len; + + return libbpf_err(err); +} + /* * Append new BTF_KIND_INT type with: * - *name* - non-empty, non-NULL type name; diff --git a/tools/lib/bpf/btf.h b/tools/lib/bpf/btf.h index 2cfe313279204..864eb51753a1a 100644 --- a/tools/lib/bpf/btf.h +++ b/tools/lib/bpf/btf.h @@ -173,6 +173,28 @@ LIBBPF_API int btf__find_str(struct btf *btf, const char *s); LIBBPF_API int btf__add_str(struct btf *btf, const char *s); LIBBPF_API int btf__add_type(struct btf *btf, const struct btf *src_btf, const struct btf_type *src_type); +/** + * @brief **btf__add_btf()** appends all the BTF types from *src_btf* into *btf* + * @param btf BTF object which all the BTF types and strings are added to + * @param src_btf BTF object which all BTF types and referenced strings are copied from + * @return BTF type ID of the first appended BTF type, or negative error code + * + * **btf__add_btf()** can be used to simply and efficiently append the entire + * contents of one BTF object to another one. All the BTF type data is copied + * over, all referenced type IDs are adjusted by adding a necessary ID offset. + * Only strings referenced from BTF types are copied over and deduplicated, so + * if there were some unused strings in *src_btf*, those won't be copied over, + * which is consistent with the general string deduplication semantics of BTF + * writing APIs. + * + * If any error is encountered during this process, the contents of *btf* is + * left intact, which means that **btf__add_btf()** follows the transactional + * semantics and the operation as a whole is all-or-nothing. + * + * *src_btf* has to be non-split BTF, as of now copying types from split BTF + * is not supported and will result in -ENOTSUP error code returned. + */ +LIBBPF_API int btf__add_btf(struct btf *btf, const struct btf *src_btf); LIBBPF_API int btf__add_int(struct btf *btf, const char *name, size_t byte_sz, int encoding); LIBBPF_API int btf__add_float(struct btf *btf, const char *name, size_t byte_sz); diff --git a/tools/lib/bpf/libbpf.map b/tools/lib/bpf/libbpf.map index 9e649cf9e7715..f6b0db1e8c8b4 100644 --- a/tools/lib/bpf/libbpf.map +++ b/tools/lib/bpf/libbpf.map @@ -389,5 +389,6 @@ LIBBPF_0.5.0 { LIBBPF_0.6.0 { global: + btf__add_btf; btf__add_tag; } LIBBPF_0.5.0; From c65eb8082d4cb02ef87bdecedce8969d5ccbea54 Mon Sep 17 00:00:00 2001 From: Andrii Nakryiko Date: Tue, 5 Oct 2021 22:11:06 -0700 Subject: [PATCH 013/181] selftests/bpf: Refactor btf_write selftest to reuse BTF generation logic Next patch will need to reuse BTF generation logic, which tests every supported BTF kind, for testing btf__add_btf() APIs. So restructure existing selftests and make it as a single subtest that uses bulk VALIDATE_RAW_BTF() macro for raw BTF dump checking. Signed-off-by: Andrii Nakryiko Signed-off-by: Daniel Borkmann Acked-by: Song Liu Link: https://lore.kernel.org/bpf/20211006051107.17921-3-andrii@kernel.org --- .../selftests/bpf/prog_tests/btf_write.c | 55 +++++++++++++++++-- 1 file changed, 49 insertions(+), 6 deletions(-) diff --git a/tools/testing/selftests/bpf/prog_tests/btf_write.c b/tools/testing/selftests/bpf/prog_tests/btf_write.c index 76548eecce2c2..aa45056182525 100644 --- a/tools/testing/selftests/bpf/prog_tests/btf_write.c +++ b/tools/testing/selftests/bpf/prog_tests/btf_write.c @@ -4,19 +4,15 @@ #include #include "btf_helpers.h" -void test_btf_write() { +static void gen_btf(struct btf *btf) +{ const struct btf_var_secinfo *vi; const struct btf_type *t; const struct btf_member *m; const struct btf_enum *v; const struct btf_param *p; - struct btf *btf; int id, err, str_off; - btf = btf__new_empty(); - if (!ASSERT_OK_PTR(btf, "new_empty")) - return; - str_off = btf__find_str(btf, "int"); ASSERT_EQ(str_off, -ENOENT, "int_str_missing_off"); @@ -301,6 +297,53 @@ void test_btf_write() { ASSERT_EQ(btf_tag(t)->component_idx, 1, "tag_component_idx"); ASSERT_STREQ(btf_type_raw_dump(btf, 19), "[19] TAG 'tag2' type_id=14 component_idx=1", "raw_dump"); +} + +static void test_btf_add() +{ + struct btf *btf; + + btf = btf__new_empty(); + if (!ASSERT_OK_PTR(btf, "new_empty")) + return; + + gen_btf(btf); + + VALIDATE_RAW_BTF( + btf, + "[1] INT 'int' size=4 bits_offset=0 nr_bits=32 encoding=SIGNED", + "[2] PTR '(anon)' type_id=1", + "[3] CONST '(anon)' type_id=5", + "[4] VOLATILE '(anon)' type_id=3", + "[5] RESTRICT '(anon)' type_id=4", + "[6] ARRAY '(anon)' type_id=2 index_type_id=1 nr_elems=10", + "[7] STRUCT 's1' size=8 vlen=2\n" + "\t'f1' type_id=1 bits_offset=0\n" + "\t'f2' type_id=1 bits_offset=32 bitfield_size=16", + "[8] UNION 'u1' size=8 vlen=1\n" + "\t'f1' type_id=1 bits_offset=0 bitfield_size=16", + "[9] ENUM 'e1' size=4 vlen=2\n" + "\t'v1' val=1\n" + "\t'v2' val=2", + "[10] FWD 'struct_fwd' fwd_kind=struct", + "[11] FWD 'union_fwd' fwd_kind=union", + "[12] ENUM 'enum_fwd' size=4 vlen=0", + "[13] TYPEDEF 'typedef1' type_id=1", + "[14] FUNC 'func1' type_id=15 linkage=global", + "[15] FUNC_PROTO '(anon)' ret_type_id=1 vlen=2\n" + "\t'p1' type_id=1\n" + "\t'p2' type_id=2", + "[16] VAR 'var1' type_id=1, linkage=global-alloc", + "[17] DATASEC 'datasec1' size=12 vlen=1\n" + "\ttype_id=1 offset=4 size=8", + "[18] TAG 'tag1' type_id=16 component_idx=-1", + "[19] TAG 'tag2' type_id=14 component_idx=1"); btf__free(btf); } + +void test_btf_write() +{ + if (test__start_subtest("btf_add")) + test_btf_add(); +} From 9d05787223913171fce20a737ba54e3b6e7da13c Mon Sep 17 00:00:00 2001 From: Andrii Nakryiko Date: Tue, 5 Oct 2021 22:11:07 -0700 Subject: [PATCH 014/181] selftests/bpf: Test new btf__add_btf() API Add a test that validates that btf__add_btf() API is correctly copying all the types from the source BTF into destination BTF object and adjusts type IDs and string offsets properly. Signed-off-by: Andrii Nakryiko Signed-off-by: Daniel Borkmann Link: https://lore.kernel.org/bpf/20211006051107.17921-4-andrii@kernel.org --- .../selftests/bpf/prog_tests/btf_write.c | 86 +++++++++++++++++++ 1 file changed, 86 insertions(+) diff --git a/tools/testing/selftests/bpf/prog_tests/btf_write.c b/tools/testing/selftests/bpf/prog_tests/btf_write.c index aa45056182525..886e0fc1efb12 100644 --- a/tools/testing/selftests/bpf/prog_tests/btf_write.c +++ b/tools/testing/selftests/bpf/prog_tests/btf_write.c @@ -342,8 +342,94 @@ static void test_btf_add() btf__free(btf); } +static void test_btf_add_btf() +{ + struct btf *btf1 = NULL, *btf2 = NULL; + int id; + + btf1 = btf__new_empty(); + if (!ASSERT_OK_PTR(btf1, "btf1")) + return; + + btf2 = btf__new_empty(); + if (!ASSERT_OK_PTR(btf2, "btf2")) + goto cleanup; + + gen_btf(btf1); + gen_btf(btf2); + + id = btf__add_btf(btf1, btf2); + if (!ASSERT_EQ(id, 20, "id")) + goto cleanup; + + VALIDATE_RAW_BTF( + btf1, + "[1] INT 'int' size=4 bits_offset=0 nr_bits=32 encoding=SIGNED", + "[2] PTR '(anon)' type_id=1", + "[3] CONST '(anon)' type_id=5", + "[4] VOLATILE '(anon)' type_id=3", + "[5] RESTRICT '(anon)' type_id=4", + "[6] ARRAY '(anon)' type_id=2 index_type_id=1 nr_elems=10", + "[7] STRUCT 's1' size=8 vlen=2\n" + "\t'f1' type_id=1 bits_offset=0\n" + "\t'f2' type_id=1 bits_offset=32 bitfield_size=16", + "[8] UNION 'u1' size=8 vlen=1\n" + "\t'f1' type_id=1 bits_offset=0 bitfield_size=16", + "[9] ENUM 'e1' size=4 vlen=2\n" + "\t'v1' val=1\n" + "\t'v2' val=2", + "[10] FWD 'struct_fwd' fwd_kind=struct", + "[11] FWD 'union_fwd' fwd_kind=union", + "[12] ENUM 'enum_fwd' size=4 vlen=0", + "[13] TYPEDEF 'typedef1' type_id=1", + "[14] FUNC 'func1' type_id=15 linkage=global", + "[15] FUNC_PROTO '(anon)' ret_type_id=1 vlen=2\n" + "\t'p1' type_id=1\n" + "\t'p2' type_id=2", + "[16] VAR 'var1' type_id=1, linkage=global-alloc", + "[17] DATASEC 'datasec1' size=12 vlen=1\n" + "\ttype_id=1 offset=4 size=8", + "[18] TAG 'tag1' type_id=16 component_idx=-1", + "[19] TAG 'tag2' type_id=14 component_idx=1", + + /* types appended from the second BTF */ + "[20] INT 'int' size=4 bits_offset=0 nr_bits=32 encoding=SIGNED", + "[21] PTR '(anon)' type_id=20", + "[22] CONST '(anon)' type_id=24", + "[23] VOLATILE '(anon)' type_id=22", + "[24] RESTRICT '(anon)' type_id=23", + "[25] ARRAY '(anon)' type_id=21 index_type_id=20 nr_elems=10", + "[26] STRUCT 's1' size=8 vlen=2\n" + "\t'f1' type_id=20 bits_offset=0\n" + "\t'f2' type_id=20 bits_offset=32 bitfield_size=16", + "[27] UNION 'u1' size=8 vlen=1\n" + "\t'f1' type_id=20 bits_offset=0 bitfield_size=16", + "[28] ENUM 'e1' size=4 vlen=2\n" + "\t'v1' val=1\n" + "\t'v2' val=2", + "[29] FWD 'struct_fwd' fwd_kind=struct", + "[30] FWD 'union_fwd' fwd_kind=union", + "[31] ENUM 'enum_fwd' size=4 vlen=0", + "[32] TYPEDEF 'typedef1' type_id=20", + "[33] FUNC 'func1' type_id=34 linkage=global", + "[34] FUNC_PROTO '(anon)' ret_type_id=20 vlen=2\n" + "\t'p1' type_id=20\n" + "\t'p2' type_id=21", + "[35] VAR 'var1' type_id=20, linkage=global-alloc", + "[36] DATASEC 'datasec1' size=12 vlen=1\n" + "\ttype_id=20 offset=4 size=8", + "[37] TAG 'tag1' type_id=35 component_idx=-1", + "[38] TAG 'tag2' type_id=33 component_idx=1"); + +cleanup: + btf__free(btf1); + btf__free(btf2); +} + void test_btf_write() { if (test__start_subtest("btf_add")) test_btf_add(); + if (test__start_subtest("btf_add_btf")) + test_btf_add_btf(); } From e737547eab6af8b57d77f7ba323c8d2d6b1a0008 Mon Sep 17 00:00:00 2001 From: Tony Ambardar Date: Tue, 5 Oct 2021 18:54:02 +0200 Subject: [PATCH 015/181] mips, uasm: Enable muhu opcode for MIPS R6 Enable the 'muhu' instruction, complementing the existing 'mulu', needed to implement a MIPS32 BPF JIT. Also fix a typo in the existing definition of 'dmulu'. Signed-off-by: Tony Ambardar Signed-off-by: Johan Almbladh Signed-off-by: Daniel Borkmann Signed-off-by: Andrii Nakryiko Link: https://lore.kernel.org/bpf/20211005165408.2305108-2-johan.almbladh@anyfinetworks.com --- arch/mips/include/asm/uasm.h | 1 + arch/mips/mm/uasm-mips.c | 4 +++- arch/mips/mm/uasm.c | 3 ++- 3 files changed, 6 insertions(+), 2 deletions(-) diff --git a/arch/mips/include/asm/uasm.h b/arch/mips/include/asm/uasm.h index f7effca791a50..5efa4e2dc9abb 100644 --- a/arch/mips/include/asm/uasm.h +++ b/arch/mips/include/asm/uasm.h @@ -145,6 +145,7 @@ Ip_u1(_mtlo); Ip_u3u1u2(_mul); Ip_u1u2(_multu); Ip_u3u1u2(_mulu); +Ip_u3u1u2(_muhu); Ip_u3u1u2(_nor); Ip_u3u1u2(_or); Ip_u2u1u3(_ori); diff --git a/arch/mips/mm/uasm-mips.c b/arch/mips/mm/uasm-mips.c index 7154a1d99aad6..e15c6700cd088 100644 --- a/arch/mips/mm/uasm-mips.c +++ b/arch/mips/mm/uasm-mips.c @@ -90,7 +90,7 @@ static const struct insn insn_table[insn_invalid] = { RS | RT | RD}, [insn_dmtc0] = {M(cop0_op, dmtc_op, 0, 0, 0, 0), RT | RD | SET}, [insn_dmultu] = {M(spec_op, 0, 0, 0, 0, dmultu_op), RS | RT}, - [insn_dmulu] = {M(spec_op, 0, 0, 0, dmult_dmul_op, dmultu_op), + [insn_dmulu] = {M(spec_op, 0, 0, 0, dmultu_dmulu_op, dmultu_op), RS | RT | RD}, [insn_drotr] = {M(spec_op, 1, 0, 0, 0, dsrl_op), RT | RD | RE}, [insn_drotr32] = {M(spec_op, 1, 0, 0, 0, dsrl32_op), RT | RD | RE}, @@ -150,6 +150,8 @@ static const struct insn insn_table[insn_invalid] = { [insn_mtlo] = {M(spec_op, 0, 0, 0, 0, mtlo_op), RS}, [insn_mulu] = {M(spec_op, 0, 0, 0, multu_mulu_op, multu_op), RS | RT | RD}, + [insn_muhu] = {M(spec_op, 0, 0, 0, multu_muhu_op, multu_op), + RS | RT | RD}, #ifndef CONFIG_CPU_MIPSR6 [insn_mul] = {M(spec2_op, 0, 0, 0, 0, mul_op), RS | RT | RD}, #else diff --git a/arch/mips/mm/uasm.c b/arch/mips/mm/uasm.c index 81dd226d6b6bf..125140979d62c 100644 --- a/arch/mips/mm/uasm.c +++ b/arch/mips/mm/uasm.c @@ -59,7 +59,7 @@ enum opcode { insn_lddir, insn_ldpte, insn_ldx, insn_lh, insn_lhu, insn_ll, insn_lld, insn_lui, insn_lw, insn_lwu, insn_lwx, insn_mfc0, insn_mfhc0, insn_mfhi, insn_mflo, insn_modu, insn_movn, insn_movz, insn_mtc0, insn_mthc0, - insn_mthi, insn_mtlo, insn_mul, insn_multu, insn_mulu, insn_nor, + insn_mthi, insn_mtlo, insn_mul, insn_multu, insn_mulu, insn_muhu, insn_nor, insn_or, insn_ori, insn_pref, insn_rfe, insn_rotr, insn_sb, insn_sc, insn_scd, insn_seleqz, insn_selnez, insn_sd, insn_sh, insn_sll, insn_sllv, insn_slt, insn_slti, insn_sltiu, insn_sltu, insn_sra, @@ -344,6 +344,7 @@ I_u1(_mtlo) I_u3u1u2(_mul) I_u1u2(_multu) I_u3u1u2(_mulu) +I_u3u1u2(_muhu) I_u3u1u2(_nor) I_u3u1u2(_or) I_u2u1u3(_ori) From f7c036c15b5388749bc8de208fb7b19d69c4f6fa Mon Sep 17 00:00:00 2001 From: Johan Almbladh Date: Tue, 5 Oct 2021 18:54:03 +0200 Subject: [PATCH 016/181] mips, uasm: Add workaround for Loongson-2F nop CPU errata This patch implements a workaround for the Loongson-2F nop in generated, code, if the existing option CONFIG_CPU_NOP_WORKAROUND is set. Before, the binutils option -mfix-loongson2f-nop was enabled, but no workaround was done when emitting MIPS code. Now, the nop pseudo instruction is emitted as "or ax,ax,zero" instead of the default "sll zero,zero,0". This is consistent with the workaround implemented by binutils. Signed-off-by: Johan Almbladh Signed-off-by: Daniel Borkmann Signed-off-by: Andrii Nakryiko Reviewed-by: Jiaxun Yang Link: https://sourceware.org/legacy-ml/binutils/2009-11/msg00387.html Link: https://lore.kernel.org/bpf/20211005165408.2305108-3-johan.almbladh@anyfinetworks.com --- arch/mips/include/asm/uasm.h | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/arch/mips/include/asm/uasm.h b/arch/mips/include/asm/uasm.h index 5efa4e2dc9abb..296bcf31abb57 100644 --- a/arch/mips/include/asm/uasm.h +++ b/arch/mips/include/asm/uasm.h @@ -249,7 +249,11 @@ static inline void uasm_l##lb(struct uasm_label **lab, u32 *addr) \ #define uasm_i_bnezl(buf, rs, off) uasm_i_bnel(buf, rs, 0, off) #define uasm_i_ehb(buf) uasm_i_sll(buf, 0, 0, 3) #define uasm_i_move(buf, a, b) UASM_i_ADDU(buf, a, 0, b) +#ifdef CONFIG_CPU_NOP_WORKAROUNDS +#define uasm_i_nop(buf) uasm_i_or(buf, 1, 1, 0) +#else #define uasm_i_nop(buf) uasm_i_sll(buf, 0, 0, 0) +#endif #define uasm_i_ssnop(buf) uasm_i_sll(buf, 0, 0, 1) static inline void uasm_i_drotr_safe(u32 **p, unsigned int a1, From eb63cfcd2ee8ec3805f6881f43341f589c3d2278 Mon Sep 17 00:00:00 2001 From: Johan Almbladh Date: Tue, 5 Oct 2021 18:54:04 +0200 Subject: [PATCH 017/181] mips, bpf: Add eBPF JIT for 32-bit MIPS This is an implementation of an eBPF JIT for 32-bit MIPS I-V and MIPS32. The implementation supports all 32-bit and 64-bit ALU and JMP operations, including the recently-added atomics. 64-bit div/mod and 64-bit atomics are implemented using function calls to math64 and atomic64 functions, respectively. All 32-bit operations are implemented natively by the JIT, except if the CPU lacks ll/sc instructions. Register mapping ================ All 64-bit eBPF registers are mapped to native 32-bit MIPS register pairs, and does not use any stack scratch space for register swapping. This means that all eBPF register data is kept in CPU registers all the time, and this simplifies the register management a lot. It also reduces the JIT's pressure on temporary registers since we do not have to move data around. Native register pairs are ordered according to CPU endiannes, following the O32 calling convention for passing 64-bit arguments and return values. The eBPF return value, arguments and callee-saved registers are mapped to their native MIPS equivalents. Since the 32 highest bits in the eBPF FP (frame pointer) register are always zero, only one general-purpose register is actually needed for the mapping. The MIPS fp register is used for this purpose. The high bits are mapped to MIPS register r0. This saves us one CPU register, which is much needed for temporaries, while still allowing us to treat the R10 (FP) register just like any other eBPF register in the JIT. The MIPS gp (global pointer) and at (assembler temporary) registers are used as internal temporary registers for constant blinding. CPU registers t6-t9 are used internally by the JIT when constructing more complex 64-bit operations. This is precisely what is needed - two registers to store an operand value, and two more as scratch registers when performing the operation. The register mapping is shown below. R0 - $v1, $v0 return value R1 - $a1, $a0 argument 1, passed in registers R2 - $a3, $a2 argument 2, passed in registers R3 - $t1, $t0 argument 3, passed on stack R4 - $t3, $t2 argument 4, passed on stack R5 - $t4, $t3 argument 5, passed on stack R6 - $s1, $s0 callee-saved R7 - $s3, $s2 callee-saved R8 - $s5, $s4 callee-saved R9 - $s7, $s6 callee-saved FP - $r0, $fp 32-bit frame pointer AX - $gp, $at constant-blinding $t6 - $t9 unallocated, JIT temporaries Jump offsets ============ The JIT tries to map all conditional JMP operations to MIPS conditional PC-relative branches. The MIPS branch offset field is 18 bits, in bytes, which is equivalent to the eBPF 16-bit instruction offset. However, since the JIT may emit more than one CPU instruction per eBPF instruction, the field width may overflow. If that happens, the JIT converts the long conditional jump to a short PC-relative branch with the condition inverted, jumping over a long unconditional absolute jmp (j). This conversion will change the instruction offset mapping used for jumps, and may in turn result in more branch offset overflows. The JIT therefore dry-runs the translation until no more branches are converted and the offsets do not change anymore. There is an upper bound on this of course, and if the JIT hits that limit, the last two iterations are run with all branches being converted. Tail call count =============== The current tail call count is stored in the 16-byte area of the caller's stack frame that is reserved for the callee in the o32 ABI. The value is initialized in the prologue, and propagated to the tail-callee by skipping the initialization instructions when emitting the tail call. Signed-off-by: Johan Almbladh Signed-off-by: Daniel Borkmann Signed-off-by: Andrii Nakryiko Link: https://lore.kernel.org/bpf/20211005165408.2305108-4-johan.almbladh@anyfinetworks.com --- arch/mips/net/Makefile | 7 +- arch/mips/net/bpf_jit_comp.c | 1032 +++++++++++++++++ arch/mips/net/bpf_jit_comp.h | 211 ++++ arch/mips/net/bpf_jit_comp32.c | 1899 ++++++++++++++++++++++++++++++++ 4 files changed, 3148 insertions(+), 1 deletion(-) create mode 100644 arch/mips/net/bpf_jit_comp.c create mode 100644 arch/mips/net/bpf_jit_comp.h create mode 100644 arch/mips/net/bpf_jit_comp32.c diff --git a/arch/mips/net/Makefile b/arch/mips/net/Makefile index d55912349039c..e057ee4ba75e8 100644 --- a/arch/mips/net/Makefile +++ b/arch/mips/net/Makefile @@ -2,4 +2,9 @@ # MIPS networking code obj-$(CONFIG_MIPS_CBPF_JIT) += bpf_jit.o bpf_jit_asm.o -obj-$(CONFIG_MIPS_EBPF_JIT) += ebpf_jit.o + +ifeq ($(CONFIG_32BIT),y) + obj-$(CONFIG_MIPS_EBPF_JIT) += bpf_jit_comp.o bpf_jit_comp32.o +else + obj-$(CONFIG_MIPS_EBPF_JIT) += ebpf_jit.o +endif diff --git a/arch/mips/net/bpf_jit_comp.c b/arch/mips/net/bpf_jit_comp.c new file mode 100644 index 0000000000000..7eb95fc577107 --- /dev/null +++ b/arch/mips/net/bpf_jit_comp.c @@ -0,0 +1,1032 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * Just-In-Time compiler for eBPF bytecode on MIPS. + * Implementation of JIT functions common to 32-bit and 64-bit CPUs. + * + * Copyright (c) 2021 Anyfi Networks AB. + * Author: Johan Almbladh + * + * Based on code and ideas from + * Copyright (c) 2017 Cavium, Inc. + * Copyright (c) 2017 Shubham Bansal + * Copyright (c) 2011 Mircea Gherzan + */ + +/* + * Code overview + * ============= + * + * - bpf_jit_comp.h + * Common definitions and utilities. + * + * - bpf_jit_comp.c + * Implementation of JIT top-level logic and exported JIT API functions. + * Implementation of internal operations shared by 32-bit and 64-bit code. + * JMP and ALU JIT control code, register control code, shared ALU and + * JMP/JMP32 JIT operations. + * + * - bpf_jit_comp32.c + * Implementation of functions to JIT prologue, epilogue and a single eBPF + * instruction for 32-bit MIPS CPUs. The functions use shared operations + * where possible, and implement the rest for 32-bit MIPS such as ALU64 + * operations. + * + * - bpf_jit_comp64.c + * Ditto, for 64-bit MIPS CPUs. + * + * Zero and sign extension + * ======================== + * 32-bit MIPS instructions on 64-bit MIPS registers use sign extension, + * but the eBPF instruction set mandates zero extension. We let the verifier + * insert explicit zero-extensions after 32-bit ALU operations, both for + * 32-bit and 64-bit MIPS JITs. Conditional JMP32 operations on 64-bit MIPs + * are JITed with sign extensions inserted when so expected. + * + * ALU operations + * ============== + * ALU operations on 32/64-bit MIPS and ALU64 operations on 64-bit MIPS are + * JITed in the following steps. ALU64 operations on 32-bit MIPS are more + * complicated and therefore only processed by special implementations in + * step (3). + * + * 1) valid_alu_i: + * Determine if an immediate operation can be emitted as such, or if + * we must fall back to the register version. + * + * 2) rewrite_alu_i: + * Convert BPF operation and immediate value to a canonical form for + * JITing. In some degenerate cases this form may be a no-op. + * + * 3) emit_alu_{i,i64,r,64}: + * Emit instructions for an ALU or ALU64 immediate or register operation. + * + * JMP operations + * ============== + * JMP and JMP32 operations require an JIT instruction offset table for + * translating the jump offset. This table is computed by dry-running the + * JIT without actually emitting anything. However, the computed PC-relative + * offset may overflow the 18-bit offset field width of the native MIPS + * branch instruction. In such cases, the long jump is converted into the + * following sequence. + * + * ! +2 Inverted PC-relative branch + * nop Delay slot + * j Unconditional absolute long jump + * nop Delay slot + * + * Since this converted sequence alters the offset table, all offsets must + * be re-calculated. This may in turn trigger new branch conversions, so + * the process is repeated until no further changes are made. Normally it + * completes in 1-2 iterations. If JIT_MAX_ITERATIONS should reached, we + * fall back to converting every remaining jump operation. The branch + * conversion is independent of how the JMP or JMP32 condition is JITed. + * + * JMP32 and JMP operations are JITed as follows. + * + * 1) setup_jmp_{i,r}: + * Convert jump conditional and offset into a form that can be JITed. + * This form may be a no-op, a canonical form, or an inverted PC-relative + * jump if branch conversion is necessary. + * + * 2) valid_jmp_i: + * Determine if an immediate operations can be emitted as such, or if + * we must fall back to the register version. Applies to JMP32 for 32-bit + * MIPS, and both JMP and JMP32 for 64-bit MIPS. + * + * 3) emit_jmp_{i,i64,r,r64}: + * Emit instructions for an JMP or JMP32 immediate or register operation. + * + * 4) finish_jmp_{i,r}: + * Emit any instructions needed to finish the jump. This includes a nop + * for the delay slot if a branch was emitted, and a long absolute jump + * if the branch was converted. + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include "bpf_jit_comp.h" + +/* Convenience macros for descriptor access */ +#define CONVERTED(desc) ((desc) & JIT_DESC_CONVERT) +#define INDEX(desc) ((desc) & ~JIT_DESC_CONVERT) + +/* + * Push registers on the stack, starting at a given depth from the stack + * pointer and increasing. The next depth to be written is returned. + */ +int push_regs(struct jit_context *ctx, u32 mask, u32 excl, int depth) +{ + int reg; + + for (reg = 0; reg < BITS_PER_BYTE * sizeof(mask); reg++) + if (mask & BIT(reg)) { + if ((excl & BIT(reg)) == 0) { + if (sizeof(long) == 4) + emit(ctx, sw, reg, depth, MIPS_R_SP); + else /* sizeof(long) == 8 */ + emit(ctx, sd, reg, depth, MIPS_R_SP); + } + depth += sizeof(long); + } + + ctx->stack_used = max((int)ctx->stack_used, depth); + return depth; +} + +/* + * Pop registers from the stack, starting at a given depth from the stack + * pointer and increasing. The next depth to be read is returned. + */ +int pop_regs(struct jit_context *ctx, u32 mask, u32 excl, int depth) +{ + int reg; + + for (reg = 0; reg < BITS_PER_BYTE * sizeof(mask); reg++) + if (mask & BIT(reg)) { + if ((excl & BIT(reg)) == 0) { + if (sizeof(long) == 4) + emit(ctx, lw, reg, depth, MIPS_R_SP); + else /* sizeof(long) == 8 */ + emit(ctx, ld, reg, depth, MIPS_R_SP); + } + depth += sizeof(long); + } + + return depth; +} + +/* Compute the 28-bit jump target address from a BPF program location */ +int get_target(struct jit_context *ctx, u32 loc) +{ + u32 index = INDEX(ctx->descriptors[loc]); + unsigned long pc = (unsigned long)&ctx->target[ctx->jit_index]; + unsigned long addr = (unsigned long)&ctx->target[index]; + + if (!ctx->target) + return 0; + + if ((addr ^ pc) & ~MIPS_JMP_MASK) + return -1; + + return addr & MIPS_JMP_MASK; +} + +/* Compute the PC-relative offset to relative BPF program offset */ +int get_offset(const struct jit_context *ctx, int off) +{ + return (INDEX(ctx->descriptors[ctx->bpf_index + off]) - + ctx->jit_index - 1) * sizeof(u32); +} + +/* dst = imm (register width) */ +void emit_mov_i(struct jit_context *ctx, u8 dst, s32 imm) +{ + if (imm >= -0x8000 && imm <= 0x7fff) { + emit(ctx, addiu, dst, MIPS_R_ZERO, imm); + } else { + emit(ctx, lui, dst, (s16)((u32)imm >> 16)); + emit(ctx, ori, dst, dst, (u16)(imm & 0xffff)); + } + clobber_reg(ctx, dst); +} + +/* dst = src (register width) */ +void emit_mov_r(struct jit_context *ctx, u8 dst, u8 src) +{ + emit(ctx, ori, dst, src, 0); + clobber_reg(ctx, dst); +} + +/* Validate ALU immediate range */ +bool valid_alu_i(u8 op, s32 imm) +{ + switch (BPF_OP(op)) { + case BPF_NEG: + case BPF_LSH: + case BPF_RSH: + case BPF_ARSH: + /* All legal eBPF values are valid */ + return true; + case BPF_ADD: + /* imm must be 16 bits */ + return imm >= -0x8000 && imm <= 0x7fff; + case BPF_SUB: + /* -imm must be 16 bits */ + return imm >= -0x7fff && imm <= 0x8000; + case BPF_AND: + case BPF_OR: + case BPF_XOR: + /* imm must be 16 bits unsigned */ + return imm >= 0 && imm <= 0xffff; + case BPF_MUL: + /* imm must be zero or a positive power of two */ + return imm == 0 || (imm > 0 && is_power_of_2(imm)); + case BPF_DIV: + case BPF_MOD: + /* imm must be an 17-bit power of two */ + return (u32)imm <= 0x10000 && is_power_of_2((u32)imm); + } + return false; +} + +/* Rewrite ALU immediate operation */ +bool rewrite_alu_i(u8 op, s32 imm, u8 *alu, s32 *val) +{ + bool act = true; + + switch (BPF_OP(op)) { + case BPF_LSH: + case BPF_RSH: + case BPF_ARSH: + case BPF_ADD: + case BPF_SUB: + case BPF_OR: + case BPF_XOR: + /* imm == 0 is a no-op */ + act = imm != 0; + break; + case BPF_MUL: + if (imm == 1) { + /* dst * 1 is a no-op */ + act = false; + } else if (imm == 0) { + /* dst * 0 is dst & 0 */ + op = BPF_AND; + } else { + /* dst * (1 << n) is dst << n */ + op = BPF_LSH; + imm = ilog2(abs(imm)); + } + break; + case BPF_DIV: + if (imm == 1) { + /* dst / 1 is a no-op */ + act = false; + } else { + /* dst / (1 << n) is dst >> n */ + op = BPF_RSH; + imm = ilog2(imm); + } + break; + case BPF_MOD: + /* dst % (1 << n) is dst & ((1 << n) - 1) */ + op = BPF_AND; + imm--; + break; + } + + *alu = op; + *val = imm; + return act; +} + +/* ALU immediate operation (32-bit) */ +void emit_alu_i(struct jit_context *ctx, u8 dst, s32 imm, u8 op) +{ + switch (BPF_OP(op)) { + /* dst = -dst */ + case BPF_NEG: + emit(ctx, subu, dst, MIPS_R_ZERO, dst); + break; + /* dst = dst & imm */ + case BPF_AND: + emit(ctx, andi, dst, dst, (u16)imm); + break; + /* dst = dst | imm */ + case BPF_OR: + emit(ctx, ori, dst, dst, (u16)imm); + break; + /* dst = dst ^ imm */ + case BPF_XOR: + emit(ctx, xori, dst, dst, (u16)imm); + break; + /* dst = dst << imm */ + case BPF_LSH: + emit(ctx, sll, dst, dst, imm); + break; + /* dst = dst >> imm */ + case BPF_RSH: + emit(ctx, srl, dst, dst, imm); + break; + /* dst = dst >> imm (arithmetic) */ + case BPF_ARSH: + emit(ctx, sra, dst, dst, imm); + break; + /* dst = dst + imm */ + case BPF_ADD: + emit(ctx, addiu, dst, dst, imm); + break; + /* dst = dst - imm */ + case BPF_SUB: + emit(ctx, addiu, dst, dst, -imm); + break; + } + clobber_reg(ctx, dst); +} + +/* ALU register operation (32-bit) */ +void emit_alu_r(struct jit_context *ctx, u8 dst, u8 src, u8 op) +{ + switch (BPF_OP(op)) { + /* dst = dst & src */ + case BPF_AND: + emit(ctx, and, dst, dst, src); + break; + /* dst = dst | src */ + case BPF_OR: + emit(ctx, or, dst, dst, src); + break; + /* dst = dst ^ src */ + case BPF_XOR: + emit(ctx, xor, dst, dst, src); + break; + /* dst = dst << src */ + case BPF_LSH: + emit(ctx, sllv, dst, dst, src); + break; + /* dst = dst >> src */ + case BPF_RSH: + emit(ctx, srlv, dst, dst, src); + break; + /* dst = dst >> src (arithmetic) */ + case BPF_ARSH: + emit(ctx, srav, dst, dst, src); + break; + /* dst = dst + src */ + case BPF_ADD: + emit(ctx, addu, dst, dst, src); + break; + /* dst = dst - src */ + case BPF_SUB: + emit(ctx, subu, dst, dst, src); + break; + /* dst = dst * src */ + case BPF_MUL: + if (cpu_has_mips32r1 || cpu_has_mips32r6) { + emit(ctx, mul, dst, dst, src); + } else { + emit(ctx, multu, dst, src); + emit(ctx, mflo, dst); + } + break; + /* dst = dst / src */ + case BPF_DIV: + if (cpu_has_mips32r6) { + emit(ctx, divu_r6, dst, dst, src); + } else { + emit(ctx, divu, dst, src); + emit(ctx, mflo, dst); + } + break; + /* dst = dst % src */ + case BPF_MOD: + if (cpu_has_mips32r6) { + emit(ctx, modu, dst, dst, src); + } else { + emit(ctx, divu, dst, src); + emit(ctx, mfhi, dst); + } + break; + } + clobber_reg(ctx, dst); +} + +/* Atomic read-modify-write (32-bit) */ +void emit_atomic_r(struct jit_context *ctx, u8 dst, u8 src, s16 off, u8 code) +{ + emit(ctx, ll, MIPS_R_T9, off, dst); + switch (code) { + case BPF_ADD: + case BPF_ADD | BPF_FETCH: + emit(ctx, addu, MIPS_R_T8, MIPS_R_T9, src); + break; + case BPF_AND: + case BPF_AND | BPF_FETCH: + emit(ctx, and, MIPS_R_T8, MIPS_R_T9, src); + break; + case BPF_OR: + case BPF_OR | BPF_FETCH: + emit(ctx, or, MIPS_R_T8, MIPS_R_T9, src); + break; + case BPF_XOR: + case BPF_XOR | BPF_FETCH: + emit(ctx, xor, MIPS_R_T8, MIPS_R_T9, src); + break; + case BPF_XCHG: + emit(ctx, move, MIPS_R_T8, src); + break; + } + emit(ctx, sc, MIPS_R_T8, off, dst); + emit(ctx, beqz, MIPS_R_T8, -16); + emit(ctx, nop); /* Delay slot */ + + if (code & BPF_FETCH) { + emit(ctx, move, src, MIPS_R_T9); + clobber_reg(ctx, src); + } +} + +/* Atomic compare-and-exchange (32-bit) */ +void emit_cmpxchg_r(struct jit_context *ctx, u8 dst, u8 src, u8 res, s16 off) +{ + emit(ctx, ll, MIPS_R_T9, off, dst); + emit(ctx, bne, MIPS_R_T9, res, 12); + emit(ctx, move, MIPS_R_T8, src); /* Delay slot */ + emit(ctx, sc, MIPS_R_T8, off, dst); + emit(ctx, beqz, MIPS_R_T8, -20); + emit(ctx, move, res, MIPS_R_T9); /* Delay slot */ + clobber_reg(ctx, res); +} + +/* Swap bytes and truncate a register word or half word */ +void emit_bswap_r(struct jit_context *ctx, u8 dst, u32 width) +{ + u8 tmp = MIPS_R_T8; + u8 msk = MIPS_R_T9; + + switch (width) { + /* Swap bytes in a word */ + case 32: + if (cpu_has_mips32r2 || cpu_has_mips32r6) { + emit(ctx, wsbh, dst, dst); + emit(ctx, rotr, dst, dst, 16); + } else { + emit(ctx, sll, tmp, dst, 16); /* tmp = dst << 16 */ + emit(ctx, srl, dst, dst, 16); /* dst = dst >> 16 */ + emit(ctx, or, dst, dst, tmp); /* dst = dst | tmp */ + + emit(ctx, lui, msk, 0xff); /* msk = 0x00ff0000 */ + emit(ctx, ori, msk, msk, 0xff); /* msk = msk | 0xff */ + + emit(ctx, and, tmp, dst, msk); /* tmp = dst & msk */ + emit(ctx, sll, tmp, tmp, 8); /* tmp = tmp << 8 */ + emit(ctx, srl, dst, dst, 8); /* dst = dst >> 8 */ + emit(ctx, and, dst, dst, msk); /* dst = dst & msk */ + emit(ctx, or, dst, dst, tmp); /* reg = dst | tmp */ + } + break; + /* Swap bytes in a half word */ + case 16: + if (cpu_has_mips32r2 || cpu_has_mips32r6) { + emit(ctx, wsbh, dst, dst); + emit(ctx, andi, dst, dst, 0xffff); + } else { + emit(ctx, andi, tmp, dst, 0xff00); /* t = d & 0xff00 */ + emit(ctx, srl, tmp, tmp, 8); /* t = t >> 8 */ + emit(ctx, andi, dst, dst, 0x00ff); /* d = d & 0x00ff */ + emit(ctx, sll, dst, dst, 8); /* d = d << 8 */ + emit(ctx, or, dst, dst, tmp); /* d = d | t */ + } + break; + } + clobber_reg(ctx, dst); +} + +/* Validate jump immediate range */ +bool valid_jmp_i(u8 op, s32 imm) +{ + switch (op) { + case JIT_JNOP: + /* Immediate value not used */ + return true; + case BPF_JEQ: + case BPF_JNE: + /* No immediate operation */ + return false; + case BPF_JSET: + case JIT_JNSET: + /* imm must be 16 bits unsigned */ + return imm >= 0 && imm <= 0xffff; + case BPF_JGE: + case BPF_JLT: + case BPF_JSGE: + case BPF_JSLT: + /* imm must be 16 bits */ + return imm >= -0x8000 && imm <= 0x7fff; + case BPF_JGT: + case BPF_JLE: + case BPF_JSGT: + case BPF_JSLE: + /* imm + 1 must be 16 bits */ + return imm >= -0x8001 && imm <= 0x7ffe; + } + return false; +} + +/* Invert a conditional jump operation */ +static u8 invert_jmp(u8 op) +{ + switch (op) { + case BPF_JA: return JIT_JNOP; + case BPF_JEQ: return BPF_JNE; + case BPF_JNE: return BPF_JEQ; + case BPF_JSET: return JIT_JNSET; + case BPF_JGT: return BPF_JLE; + case BPF_JGE: return BPF_JLT; + case BPF_JLT: return BPF_JGE; + case BPF_JLE: return BPF_JGT; + case BPF_JSGT: return BPF_JSLE; + case BPF_JSGE: return BPF_JSLT; + case BPF_JSLT: return BPF_JSGE; + case BPF_JSLE: return BPF_JSGT; + } + return 0; +} + +/* Prepare a PC-relative jump operation */ +static void setup_jmp(struct jit_context *ctx, u8 bpf_op, + s16 bpf_off, u8 *jit_op, s32 *jit_off) +{ + u32 *descp = &ctx->descriptors[ctx->bpf_index]; + int op = bpf_op; + int offset = 0; + + /* Do not compute offsets on the first pass */ + if (INDEX(*descp) == 0) + goto done; + + /* Skip jumps never taken */ + if (bpf_op == JIT_JNOP) + goto done; + + /* Convert jumps always taken */ + if (bpf_op == BPF_JA) + *descp |= JIT_DESC_CONVERT; + + /* + * Current ctx->jit_index points to the start of the branch preamble. + * Since the preamble differs among different branch conditionals, + * the current index cannot be used to compute the branch offset. + * Instead, we use the offset table value for the next instruction, + * which gives the index immediately after the branch delay slot. + */ + if (!CONVERTED(*descp)) { + int target = ctx->bpf_index + bpf_off + 1; + int origin = ctx->bpf_index + 1; + + offset = (INDEX(ctx->descriptors[target]) - + INDEX(ctx->descriptors[origin]) + 1) * sizeof(u32); + } + + /* + * The PC-relative branch offset field on MIPS is 18 bits signed, + * so if the computed offset is larger than this we generate a an + * absolute jump that we skip with an inverted conditional branch. + */ + if (CONVERTED(*descp) || offset < -0x20000 || offset > 0x1ffff) { + offset = 3 * sizeof(u32); + op = invert_jmp(bpf_op); + ctx->changes += !CONVERTED(*descp); + *descp |= JIT_DESC_CONVERT; + } + +done: + *jit_off = offset; + *jit_op = op; +} + +/* Prepare a PC-relative jump operation with immediate conditional */ +void setup_jmp_i(struct jit_context *ctx, s32 imm, u8 width, + u8 bpf_op, s16 bpf_off, u8 *jit_op, s32 *jit_off) +{ + bool always = false; + bool never = false; + + switch (bpf_op) { + case BPF_JEQ: + case BPF_JNE: + break; + case BPF_JSET: + case BPF_JLT: + never = imm == 0; + break; + case BPF_JGE: + always = imm == 0; + break; + case BPF_JGT: + never = (u32)imm == U32_MAX; + break; + case BPF_JLE: + always = (u32)imm == U32_MAX; + break; + case BPF_JSGT: + never = imm == S32_MAX && width == 32; + break; + case BPF_JSGE: + always = imm == S32_MIN && width == 32; + break; + case BPF_JSLT: + never = imm == S32_MIN && width == 32; + break; + case BPF_JSLE: + always = imm == S32_MAX && width == 32; + break; + } + + if (never) + bpf_op = JIT_JNOP; + if (always) + bpf_op = BPF_JA; + setup_jmp(ctx, bpf_op, bpf_off, jit_op, jit_off); +} + +/* Prepare a PC-relative jump operation with register conditional */ +void setup_jmp_r(struct jit_context *ctx, bool same_reg, + u8 bpf_op, s16 bpf_off, u8 *jit_op, s32 *jit_off) +{ + switch (bpf_op) { + case BPF_JSET: + break; + case BPF_JEQ: + case BPF_JGE: + case BPF_JLE: + case BPF_JSGE: + case BPF_JSLE: + if (same_reg) + bpf_op = BPF_JA; + break; + case BPF_JNE: + case BPF_JLT: + case BPF_JGT: + case BPF_JSGT: + case BPF_JSLT: + if (same_reg) + bpf_op = JIT_JNOP; + break; + } + setup_jmp(ctx, bpf_op, bpf_off, jit_op, jit_off); +} + +/* Finish a PC-relative jump operation */ +int finish_jmp(struct jit_context *ctx, u8 jit_op, s16 bpf_off) +{ + /* Emit conditional branch delay slot */ + if (jit_op != JIT_JNOP) + emit(ctx, nop); + /* + * Emit an absolute long jump with delay slot, + * if the PC-relative branch was converted. + */ + if (CONVERTED(ctx->descriptors[ctx->bpf_index])) { + int target = get_target(ctx, ctx->bpf_index + bpf_off + 1); + + if (target < 0) + return -1; + emit(ctx, j, target); + emit(ctx, nop); + } + return 0; +} + +/* Jump immediate (32-bit) */ +void emit_jmp_i(struct jit_context *ctx, u8 dst, s32 imm, s32 off, u8 op) +{ + switch (op) { + /* No-op, used internally for branch optimization */ + case JIT_JNOP: + break; + /* PC += off if dst & imm */ + case BPF_JSET: + emit(ctx, andi, MIPS_R_T9, dst, (u16)imm); + emit(ctx, bnez, MIPS_R_T9, off); + break; + /* PC += off if (dst & imm) == 0 (not in BPF, used for long jumps) */ + case JIT_JNSET: + emit(ctx, andi, MIPS_R_T9, dst, (u16)imm); + emit(ctx, beqz, MIPS_R_T9, off); + break; + /* PC += off if dst > imm */ + case BPF_JGT: + emit(ctx, sltiu, MIPS_R_T9, dst, imm + 1); + emit(ctx, beqz, MIPS_R_T9, off); + break; + /* PC += off if dst >= imm */ + case BPF_JGE: + emit(ctx, sltiu, MIPS_R_T9, dst, imm); + emit(ctx, beqz, MIPS_R_T9, off); + break; + /* PC += off if dst < imm */ + case BPF_JLT: + emit(ctx, sltiu, MIPS_R_T9, dst, imm); + emit(ctx, bnez, MIPS_R_T9, off); + break; + /* PC += off if dst <= imm */ + case BPF_JLE: + emit(ctx, sltiu, MIPS_R_T9, dst, imm + 1); + emit(ctx, bnez, MIPS_R_T9, off); + break; + /* PC += off if dst > imm (signed) */ + case BPF_JSGT: + emit(ctx, slti, MIPS_R_T9, dst, imm + 1); + emit(ctx, beqz, MIPS_R_T9, off); + break; + /* PC += off if dst >= imm (signed) */ + case BPF_JSGE: + emit(ctx, slti, MIPS_R_T9, dst, imm); + emit(ctx, beqz, MIPS_R_T9, off); + break; + /* PC += off if dst < imm (signed) */ + case BPF_JSLT: + emit(ctx, slti, MIPS_R_T9, dst, imm); + emit(ctx, bnez, MIPS_R_T9, off); + break; + /* PC += off if dst <= imm (signed) */ + case BPF_JSLE: + emit(ctx, slti, MIPS_R_T9, dst, imm + 1); + emit(ctx, bnez, MIPS_R_T9, off); + break; + } +} + +/* Jump register (32-bit) */ +void emit_jmp_r(struct jit_context *ctx, u8 dst, u8 src, s32 off, u8 op) +{ + switch (op) { + /* No-op, used internally for branch optimization */ + case JIT_JNOP: + break; + /* PC += off if dst == src */ + case BPF_JEQ: + emit(ctx, beq, dst, src, off); + break; + /* PC += off if dst != src */ + case BPF_JNE: + emit(ctx, bne, dst, src, off); + break; + /* PC += off if dst & src */ + case BPF_JSET: + emit(ctx, and, MIPS_R_T9, dst, src); + emit(ctx, bnez, MIPS_R_T9, off); + break; + /* PC += off if (dst & imm) == 0 (not in BPF, used for long jumps) */ + case JIT_JNSET: + emit(ctx, and, MIPS_R_T9, dst, src); + emit(ctx, beqz, MIPS_R_T9, off); + break; + /* PC += off if dst > src */ + case BPF_JGT: + emit(ctx, sltu, MIPS_R_T9, src, dst); + emit(ctx, bnez, MIPS_R_T9, off); + break; + /* PC += off if dst >= src */ + case BPF_JGE: + emit(ctx, sltu, MIPS_R_T9, dst, src); + emit(ctx, beqz, MIPS_R_T9, off); + break; + /* PC += off if dst < src */ + case BPF_JLT: + emit(ctx, sltu, MIPS_R_T9, dst, src); + emit(ctx, bnez, MIPS_R_T9, off); + break; + /* PC += off if dst <= src */ + case BPF_JLE: + emit(ctx, sltu, MIPS_R_T9, src, dst); + emit(ctx, beqz, MIPS_R_T9, off); + break; + /* PC += off if dst > src (signed) */ + case BPF_JSGT: + emit(ctx, slt, MIPS_R_T9, src, dst); + emit(ctx, bnez, MIPS_R_T9, off); + break; + /* PC += off if dst >= src (signed) */ + case BPF_JSGE: + emit(ctx, slt, MIPS_R_T9, dst, src); + emit(ctx, beqz, MIPS_R_T9, off); + break; + /* PC += off if dst < src (signed) */ + case BPF_JSLT: + emit(ctx, slt, MIPS_R_T9, dst, src); + emit(ctx, bnez, MIPS_R_T9, off); + break; + /* PC += off if dst <= src (signed) */ + case BPF_JSLE: + emit(ctx, slt, MIPS_R_T9, src, dst); + emit(ctx, beqz, MIPS_R_T9, off); + break; + } +} + +/* Jump always */ +int emit_ja(struct jit_context *ctx, s16 off) +{ + int target = get_target(ctx, ctx->bpf_index + off + 1); + + if (target < 0) + return -1; + emit(ctx, j, target); + emit(ctx, nop); + return 0; +} + +/* Jump to epilogue */ +int emit_exit(struct jit_context *ctx) +{ + int target = get_target(ctx, ctx->program->len); + + if (target < 0) + return -1; + emit(ctx, j, target); + emit(ctx, nop); + return 0; +} + +/* Build the program body from eBPF bytecode */ +static int build_body(struct jit_context *ctx) +{ + const struct bpf_prog *prog = ctx->program; + unsigned int i; + + ctx->stack_used = 0; + for (i = 0; i < prog->len; i++) { + const struct bpf_insn *insn = &prog->insnsi[i]; + u32 *descp = &ctx->descriptors[i]; + int ret; + + access_reg(ctx, insn->src_reg); + access_reg(ctx, insn->dst_reg); + + ctx->bpf_index = i; + if (ctx->target == NULL) { + ctx->changes += INDEX(*descp) != ctx->jit_index; + *descp &= JIT_DESC_CONVERT; + *descp |= ctx->jit_index; + } + + ret = build_insn(insn, ctx); + if (ret < 0) + return ret; + + if (ret > 0) { + i++; + if (ctx->target == NULL) + descp[1] = ctx->jit_index; + } + } + + /* Store the end offset, where the epilogue begins */ + ctx->descriptors[prog->len] = ctx->jit_index; + return 0; +} + +/* Set the branch conversion flag on all instructions */ +static void set_convert_flag(struct jit_context *ctx, bool enable) +{ + const struct bpf_prog *prog = ctx->program; + u32 flag = enable ? JIT_DESC_CONVERT : 0; + unsigned int i; + + for (i = 0; i <= prog->len; i++) + ctx->descriptors[i] = INDEX(ctx->descriptors[i]) | flag; +} + +static void jit_fill_hole(void *area, unsigned int size) +{ + u32 *p; + + /* We are guaranteed to have aligned memory. */ + for (p = area; size >= sizeof(u32); size -= sizeof(u32)) + uasm_i_break(&p, BRK_BUG); /* Increments p */ +} + +bool bpf_jit_needs_zext(void) +{ + return true; +} + +struct bpf_prog *bpf_int_jit_compile(struct bpf_prog *prog) +{ + struct bpf_prog *tmp, *orig_prog = prog; + struct bpf_binary_header *header = NULL; + struct jit_context ctx; + bool tmp_blinded = false; + unsigned int tmp_idx; + unsigned int image_size; + u8 *image_ptr; + int tries; + + /* + * If BPF JIT was not enabled then we must fall back to + * the interpreter. + */ + if (!prog->jit_requested) + return orig_prog; + /* + * If constant blinding was enabled and we failed during blinding + * then we must fall back to the interpreter. Otherwise, we save + * the new JITed code. + */ + tmp = bpf_jit_blind_constants(prog); + if (IS_ERR(tmp)) + return orig_prog; + if (tmp != prog) { + tmp_blinded = true; + prog = tmp; + } + + memset(&ctx, 0, sizeof(ctx)); + ctx.program = prog; + + /* + * Not able to allocate memory for descriptors[], then + * we must fall back to the interpreter + */ + ctx.descriptors = kcalloc(prog->len + 1, sizeof(*ctx.descriptors), + GFP_KERNEL); + if (ctx.descriptors == NULL) + goto out_err; + + /* First pass discovers used resources */ + if (build_body(&ctx) < 0) + goto out_err; + /* + * Second pass computes instruction offsets. + * If any PC-relative branches are out of range, a sequence of + * a PC-relative branch + a jump is generated, and we have to + * try again from the beginning to generate the new offsets. + * This is done until no additional conversions are necessary. + * The last two iterations are done with all branches being + * converted, to guarantee offset table convergence within a + * fixed number of iterations. + */ + ctx.jit_index = 0; + build_prologue(&ctx); + tmp_idx = ctx.jit_index; + + tries = JIT_MAX_ITERATIONS; + do { + ctx.jit_index = tmp_idx; + ctx.changes = 0; + if (tries == 2) + set_convert_flag(&ctx, true); + if (build_body(&ctx) < 0) + goto out_err; + } while (ctx.changes > 0 && --tries > 0); + + if (WARN_ONCE(ctx.changes > 0, "JIT offsets failed to converge")) + goto out_err; + + build_epilogue(&ctx, MIPS_R_RA); + + /* Now we know the size of the structure to make */ + image_size = sizeof(u32) * ctx.jit_index; + header = bpf_jit_binary_alloc(image_size, &image_ptr, + sizeof(u32), jit_fill_hole); + /* + * Not able to allocate memory for the structure then + * we must fall back to the interpretation + */ + if (header == NULL) + goto out_err; + + /* Actual pass to generate final JIT code */ + ctx.target = (u32 *)image_ptr; + ctx.jit_index = 0; + + /* + * If building the JITed code fails somehow, + * we fall back to the interpretation. + */ + build_prologue(&ctx); + if (build_body(&ctx) < 0) + goto out_err; + build_epilogue(&ctx, MIPS_R_RA); + + /* Populate line info meta data */ + set_convert_flag(&ctx, false); + bpf_prog_fill_jited_linfo(prog, &ctx.descriptors[1]); + + /* Set as read-only exec and flush instruction cache */ + bpf_jit_binary_lock_ro(header); + flush_icache_range((unsigned long)header, + (unsigned long)&ctx.target[ctx.jit_index]); + + if (bpf_jit_enable > 1) + bpf_jit_dump(prog->len, image_size, 2, ctx.target); + + prog->bpf_func = (void *)ctx.target; + prog->jited = 1; + prog->jited_len = image_size; + +out: + if (tmp_blinded) + bpf_jit_prog_release_other(prog, prog == orig_prog ? + tmp : orig_prog); + kfree(ctx.descriptors); + return prog; + +out_err: + prog = orig_prog; + if (header) + bpf_jit_binary_free(header); + goto out; +} diff --git a/arch/mips/net/bpf_jit_comp.h b/arch/mips/net/bpf_jit_comp.h new file mode 100644 index 0000000000000..44787cf377dd0 --- /dev/null +++ b/arch/mips/net/bpf_jit_comp.h @@ -0,0 +1,211 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ +/* + * Just-In-Time compiler for eBPF bytecode on 32-bit and 64-bit MIPS. + * + * Copyright (c) 2021 Anyfi Networks AB. + * Author: Johan Almbladh + * + * Based on code and ideas from + * Copyright (c) 2017 Cavium, Inc. + * Copyright (c) 2017 Shubham Bansal + * Copyright (c) 2011 Mircea Gherzan + */ + +#ifndef _BPF_JIT_COMP_H +#define _BPF_JIT_COMP_H + +/* MIPS registers */ +#define MIPS_R_ZERO 0 /* Const zero */ +#define MIPS_R_AT 1 /* Asm temp */ +#define MIPS_R_V0 2 /* Result */ +#define MIPS_R_V1 3 /* Result */ +#define MIPS_R_A0 4 /* Argument */ +#define MIPS_R_A1 5 /* Argument */ +#define MIPS_R_A2 6 /* Argument */ +#define MIPS_R_A3 7 /* Argument */ +#define MIPS_R_A4 8 /* Arg (n64) */ +#define MIPS_R_A5 9 /* Arg (n64) */ +#define MIPS_R_A6 10 /* Arg (n64) */ +#define MIPS_R_A7 11 /* Arg (n64) */ +#define MIPS_R_T0 8 /* Temp (o32) */ +#define MIPS_R_T1 9 /* Temp (o32) */ +#define MIPS_R_T2 10 /* Temp (o32) */ +#define MIPS_R_T3 11 /* Temp (o32) */ +#define MIPS_R_T4 12 /* Temporary */ +#define MIPS_R_T5 13 /* Temporary */ +#define MIPS_R_T6 14 /* Temporary */ +#define MIPS_R_T7 15 /* Temporary */ +#define MIPS_R_S0 16 /* Saved */ +#define MIPS_R_S1 17 /* Saved */ +#define MIPS_R_S2 18 /* Saved */ +#define MIPS_R_S3 19 /* Saved */ +#define MIPS_R_S4 20 /* Saved */ +#define MIPS_R_S5 21 /* Saved */ +#define MIPS_R_S6 22 /* Saved */ +#define MIPS_R_S7 23 /* Saved */ +#define MIPS_R_T8 24 /* Temporary */ +#define MIPS_R_T9 25 /* Temporary */ +/* MIPS_R_K0 26 Reserved */ +/* MIPS_R_K1 27 Reserved */ +#define MIPS_R_GP 28 /* Global ptr */ +#define MIPS_R_SP 29 /* Stack ptr */ +#define MIPS_R_FP 30 /* Frame ptr */ +#define MIPS_R_RA 31 /* Return */ + +/* + * Jump address mask for immediate jumps. The four most significant bits + * must be equal to PC. + */ +#define MIPS_JMP_MASK 0x0fffffffUL + +/* Maximum number of iterations in offset table computation */ +#define JIT_MAX_ITERATIONS 8 + +/* + * Jump pseudo-instructions used internally + * for branch conversion and branch optimization. + */ +#define JIT_JNSET 0xe0 +#define JIT_JNOP 0xf0 + +/* Descriptor flag for PC-relative branch conversion */ +#define JIT_DESC_CONVERT BIT(31) + +/* JIT context for an eBPF program */ +struct jit_context { + struct bpf_prog *program; /* The eBPF program being JITed */ + u32 *descriptors; /* eBPF to JITed CPU insn descriptors */ + u32 *target; /* JITed code buffer */ + u32 bpf_index; /* Index of current BPF program insn */ + u32 jit_index; /* Index of current JIT target insn */ + u32 changes; /* Number of PC-relative branch conv */ + u32 accessed; /* Bit mask of read eBPF registers */ + u32 clobbered; /* Bit mask of modified CPU registers */ + u32 stack_size; /* Total allocated stack size in bytes */ + u32 saved_size; /* Size of callee-saved registers */ + u32 stack_used; /* Stack size used for function calls */ +}; + +/* Emit the instruction if the JIT memory space has been allocated */ +#define emit(ctx, func, ...) \ +do { \ + if ((ctx)->target != NULL) { \ + u32 *p = &(ctx)->target[ctx->jit_index]; \ + uasm_i_##func(&p, ##__VA_ARGS__); \ + } \ + (ctx)->jit_index++; \ +} while (0) + +/* + * Mark a BPF register as accessed, it needs to be + * initialized by the program if expected, e.g. FP. + */ +static inline void access_reg(struct jit_context *ctx, u8 reg) +{ + ctx->accessed |= BIT(reg); +} + +/* + * Mark a CPU register as clobbered, it needs to be + * saved/restored by the program if callee-saved. + */ +static inline void clobber_reg(struct jit_context *ctx, u8 reg) +{ + ctx->clobbered |= BIT(reg); +} + +/* + * Push registers on the stack, starting at a given depth from the stack + * pointer and increasing. The next depth to be written is returned. + */ +int push_regs(struct jit_context *ctx, u32 mask, u32 excl, int depth); + +/* + * Pop registers from the stack, starting at a given depth from the stack + * pointer and increasing. The next depth to be read is returned. + */ +int pop_regs(struct jit_context *ctx, u32 mask, u32 excl, int depth); + +/* Compute the 28-bit jump target address from a BPF program location */ +int get_target(struct jit_context *ctx, u32 loc); + +/* Compute the PC-relative offset to relative BPF program offset */ +int get_offset(const struct jit_context *ctx, int off); + +/* dst = imm (32-bit) */ +void emit_mov_i(struct jit_context *ctx, u8 dst, s32 imm); + +/* dst = src (32-bit) */ +void emit_mov_r(struct jit_context *ctx, u8 dst, u8 src); + +/* Validate ALU/ALU64 immediate range */ +bool valid_alu_i(u8 op, s32 imm); + +/* Rewrite ALU/ALU64 immediate operation */ +bool rewrite_alu_i(u8 op, s32 imm, u8 *alu, s32 *val); + +/* ALU immediate operation (32-bit) */ +void emit_alu_i(struct jit_context *ctx, u8 dst, s32 imm, u8 op); + +/* ALU register operation (32-bit) */ +void emit_alu_r(struct jit_context *ctx, u8 dst, u8 src, u8 op); + +/* Atomic read-modify-write (32-bit) */ +void emit_atomic_r(struct jit_context *ctx, u8 dst, u8 src, s16 off, u8 code); + +/* Atomic compare-and-exchange (32-bit) */ +void emit_cmpxchg_r(struct jit_context *ctx, u8 dst, u8 src, u8 res, s16 off); + +/* Swap bytes and truncate a register word or half word */ +void emit_bswap_r(struct jit_context *ctx, u8 dst, u32 width); + +/* Validate JMP/JMP32 immediate range */ +bool valid_jmp_i(u8 op, s32 imm); + +/* Prepare a PC-relative jump operation with immediate conditional */ +void setup_jmp_i(struct jit_context *ctx, s32 imm, u8 width, + u8 bpf_op, s16 bpf_off, u8 *jit_op, s32 *jit_off); + +/* Prepare a PC-relative jump operation with register conditional */ +void setup_jmp_r(struct jit_context *ctx, bool same_reg, + u8 bpf_op, s16 bpf_off, u8 *jit_op, s32 *jit_off); + +/* Finish a PC-relative jump operation */ +int finish_jmp(struct jit_context *ctx, u8 jit_op, s16 bpf_off); + +/* Conditional JMP/JMP32 immediate */ +void emit_jmp_i(struct jit_context *ctx, u8 dst, s32 imm, s32 off, u8 op); + +/* Conditional JMP/JMP32 register */ +void emit_jmp_r(struct jit_context *ctx, u8 dst, u8 src, s32 off, u8 op); + +/* Jump always */ +int emit_ja(struct jit_context *ctx, s16 off); + +/* Jump to epilogue */ +int emit_exit(struct jit_context *ctx); + +/* + * Build program prologue to set up the stack and registers. + * This function is implemented separately for 32-bit and 64-bit JITs. + */ +void build_prologue(struct jit_context *ctx); + +/* + * Build the program epilogue to restore the stack and registers. + * This function is implemented separately for 32-bit and 64-bit JITs. + */ +void build_epilogue(struct jit_context *ctx, int dest_reg); + +/* + * Convert an eBPF instruction to native instruction, i.e + * JITs an eBPF instruction. + * Returns : + * 0 - Successfully JITed an 8-byte eBPF instruction + * >0 - Successfully JITed a 16-byte eBPF instruction + * <0 - Failed to JIT. + * This function is implemented separately for 32-bit and 64-bit JITs. + */ +int build_insn(const struct bpf_insn *insn, struct jit_context *ctx); + +#endif /* _BPF_JIT_COMP_H */ diff --git a/arch/mips/net/bpf_jit_comp32.c b/arch/mips/net/bpf_jit_comp32.c new file mode 100644 index 0000000000000..9d7041a2e5d73 --- /dev/null +++ b/arch/mips/net/bpf_jit_comp32.c @@ -0,0 +1,1899 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * Just-In-Time compiler for eBPF bytecode on MIPS. + * Implementation of JIT functions for 32-bit CPUs. + * + * Copyright (c) 2021 Anyfi Networks AB. + * Author: Johan Almbladh + * + * Based on code and ideas from + * Copyright (c) 2017 Cavium, Inc. + * Copyright (c) 2017 Shubham Bansal + * Copyright (c) 2011 Mircea Gherzan + */ + +#include +#include +#include +#include +#include +#include +#include + +#include "bpf_jit_comp.h" + +/* MIPS a4-a7 are not available in the o32 ABI */ +#undef MIPS_R_A4 +#undef MIPS_R_A5 +#undef MIPS_R_A6 +#undef MIPS_R_A7 + +/* Stack is 8-byte aligned in o32 ABI */ +#define MIPS_STACK_ALIGNMENT 8 + +/* + * The top 16 bytes of a stack frame is reserved for the callee in O32 ABI. + * This corresponds to stack space for register arguments a0-a3. + */ +#define JIT_RESERVED_STACK 16 + +/* Temporary 64-bit register used by JIT */ +#define JIT_REG_TMP MAX_BPF_JIT_REG + +/* + * Number of prologue bytes to skip when doing a tail call. + * Tail call count (TCC) initialization (8 bytes) always, plus + * R0-to-v0 assignment (4 bytes) if big endian. + */ +#ifdef __BIG_ENDIAN +#define JIT_TCALL_SKIP 12 +#else +#define JIT_TCALL_SKIP 8 +#endif + +/* CPU registers holding the callee return value */ +#define JIT_RETURN_REGS \ + (BIT(MIPS_R_V0) | \ + BIT(MIPS_R_V1)) + +/* CPU registers arguments passed to callee directly */ +#define JIT_ARG_REGS \ + (BIT(MIPS_R_A0) | \ + BIT(MIPS_R_A1) | \ + BIT(MIPS_R_A2) | \ + BIT(MIPS_R_A3)) + +/* CPU register arguments passed to callee on stack */ +#define JIT_STACK_REGS \ + (BIT(MIPS_R_T0) | \ + BIT(MIPS_R_T1) | \ + BIT(MIPS_R_T2) | \ + BIT(MIPS_R_T3) | \ + BIT(MIPS_R_T4) | \ + BIT(MIPS_R_T5)) + +/* Caller-saved CPU registers */ +#define JIT_CALLER_REGS \ + (JIT_RETURN_REGS | \ + JIT_ARG_REGS | \ + JIT_STACK_REGS) + +/* Callee-saved CPU registers */ +#define JIT_CALLEE_REGS \ + (BIT(MIPS_R_S0) | \ + BIT(MIPS_R_S1) | \ + BIT(MIPS_R_S2) | \ + BIT(MIPS_R_S3) | \ + BIT(MIPS_R_S4) | \ + BIT(MIPS_R_S5) | \ + BIT(MIPS_R_S6) | \ + BIT(MIPS_R_S7) | \ + BIT(MIPS_R_GP) | \ + BIT(MIPS_R_FP) | \ + BIT(MIPS_R_RA)) + +/* + * Mapping of 64-bit eBPF registers to 32-bit native MIPS registers. + * + * 1) Native register pairs are ordered according to CPU endiannes, following + * the MIPS convention for passing 64-bit arguments and return values. + * 2) The eBPF return value, arguments and callee-saved registers are mapped + * to their native MIPS equivalents. + * 3) Since the 32 highest bits in the eBPF FP register are always zero, + * only one general-purpose register is actually needed for the mapping. + * We use the fp register for this purpose, and map the highest bits to + * the MIPS register r0 (zero). + * 4) We use the MIPS gp and at registers as internal temporary registers + * for constant blinding. The gp register is callee-saved. + * 5) One 64-bit temporary register is mapped for use when sign-extending + * immediate operands. MIPS registers t6-t9 are available to the JIT + * for as temporaries when implementing complex 64-bit operations. + * + * With this scheme all eBPF registers are being mapped to native MIPS + * registers without having to use any stack scratch space. The direct + * register mapping (2) simplifies the handling of function calls. + */ +static const u8 bpf2mips32[][2] = { + /* Return value from in-kernel function, and exit value from eBPF */ + [BPF_REG_0] = {MIPS_R_V1, MIPS_R_V0}, + /* Arguments from eBPF program to in-kernel function */ + [BPF_REG_1] = {MIPS_R_A1, MIPS_R_A0}, + [BPF_REG_2] = {MIPS_R_A3, MIPS_R_A2}, + /* Remaining arguments, to be passed on the stack per O32 ABI */ + [BPF_REG_3] = {MIPS_R_T1, MIPS_R_T0}, + [BPF_REG_4] = {MIPS_R_T3, MIPS_R_T2}, + [BPF_REG_5] = {MIPS_R_T5, MIPS_R_T4}, + /* Callee-saved registers that in-kernel function will preserve */ + [BPF_REG_6] = {MIPS_R_S1, MIPS_R_S0}, + [BPF_REG_7] = {MIPS_R_S3, MIPS_R_S2}, + [BPF_REG_8] = {MIPS_R_S5, MIPS_R_S4}, + [BPF_REG_9] = {MIPS_R_S7, MIPS_R_S6}, + /* Read-only frame pointer to access the eBPF stack */ +#ifdef __BIG_ENDIAN + [BPF_REG_FP] = {MIPS_R_FP, MIPS_R_ZERO}, +#else + [BPF_REG_FP] = {MIPS_R_ZERO, MIPS_R_FP}, +#endif + /* Temporary register for blinding constants */ + [BPF_REG_AX] = {MIPS_R_GP, MIPS_R_AT}, + /* Temporary register for internal JIT use */ + [JIT_REG_TMP] = {MIPS_R_T7, MIPS_R_T6}, +}; + +/* Get low CPU register for a 64-bit eBPF register mapping */ +static inline u8 lo(const u8 reg[]) +{ +#ifdef __BIG_ENDIAN + return reg[0]; +#else + return reg[1]; +#endif +} + +/* Get high CPU register for a 64-bit eBPF register mapping */ +static inline u8 hi(const u8 reg[]) +{ +#ifdef __BIG_ENDIAN + return reg[1]; +#else + return reg[0]; +#endif +} + +/* + * Mark a 64-bit CPU register pair as clobbered, it needs to be + * saved/restored by the program if callee-saved. + */ +static void clobber_reg64(struct jit_context *ctx, const u8 reg[]) +{ + clobber_reg(ctx, reg[0]); + clobber_reg(ctx, reg[1]); +} + +/* dst = imm (sign-extended) */ +static void emit_mov_se_i64(struct jit_context *ctx, const u8 dst[], s32 imm) +{ + emit_mov_i(ctx, lo(dst), imm); + if (imm < 0) + emit(ctx, addiu, hi(dst), MIPS_R_ZERO, -1); + else + emit(ctx, move, hi(dst), MIPS_R_ZERO); + clobber_reg64(ctx, dst); +} + +/* Zero extension, if verifier does not do it for us */ +static void emit_zext_ver(struct jit_context *ctx, const u8 dst[]) +{ + if (!ctx->program->aux->verifier_zext) { + emit(ctx, move, hi(dst), MIPS_R_ZERO); + clobber_reg(ctx, hi(dst)); + } +} + +/* Load delay slot, if ISA mandates it */ +static void emit_load_delay(struct jit_context *ctx) +{ + if (!cpu_has_mips_2_3_4_5_r) + emit(ctx, nop); +} + +/* ALU immediate operation (64-bit) */ +static void emit_alu_i64(struct jit_context *ctx, + const u8 dst[], s32 imm, u8 op) +{ + u8 src = MIPS_R_T6; + + /* + * ADD/SUB with all but the max negative imm can be handled by + * inverting the operation and the imm value, saving one insn. + */ + if (imm > S32_MIN && imm < 0) + switch (op) { + case BPF_ADD: + op = BPF_SUB; + imm = -imm; + break; + case BPF_SUB: + op = BPF_ADD; + imm = -imm; + break; + } + + /* Move immediate to temporary register */ + emit_mov_i(ctx, src, imm); + + switch (op) { + /* dst = dst + imm */ + case BPF_ADD: + emit(ctx, addu, lo(dst), lo(dst), src); + emit(ctx, sltu, MIPS_R_T9, lo(dst), src); + emit(ctx, addu, hi(dst), hi(dst), MIPS_R_T9); + if (imm < 0) + emit(ctx, addiu, hi(dst), hi(dst), -1); + break; + /* dst = dst - imm */ + case BPF_SUB: + emit(ctx, sltu, MIPS_R_T9, lo(dst), src); + emit(ctx, subu, lo(dst), lo(dst), src); + emit(ctx, subu, hi(dst), hi(dst), MIPS_R_T9); + if (imm < 0) + emit(ctx, addiu, hi(dst), hi(dst), 1); + break; + /* dst = dst | imm */ + case BPF_OR: + emit(ctx, or, lo(dst), lo(dst), src); + if (imm < 0) + emit(ctx, addiu, hi(dst), MIPS_R_ZERO, -1); + break; + /* dst = dst & imm */ + case BPF_AND: + emit(ctx, and, lo(dst), lo(dst), src); + if (imm >= 0) + emit(ctx, move, hi(dst), MIPS_R_ZERO); + break; + /* dst = dst ^ imm */ + case BPF_XOR: + emit(ctx, xor, lo(dst), lo(dst), src); + if (imm < 0) { + emit(ctx, subu, hi(dst), MIPS_R_ZERO, hi(dst)); + emit(ctx, addiu, hi(dst), hi(dst), -1); + } + break; + } + clobber_reg64(ctx, dst); +} + +/* ALU register operation (64-bit) */ +static void emit_alu_r64(struct jit_context *ctx, + const u8 dst[], const u8 src[], u8 op) +{ + switch (BPF_OP(op)) { + /* dst = dst + src */ + case BPF_ADD: + if (src == dst) { + emit(ctx, srl, MIPS_R_T9, lo(dst), 31); + emit(ctx, addu, lo(dst), lo(dst), lo(dst)); + } else { + emit(ctx, addu, lo(dst), lo(dst), lo(src)); + emit(ctx, sltu, MIPS_R_T9, lo(dst), lo(src)); + } + emit(ctx, addu, hi(dst), hi(dst), hi(src)); + emit(ctx, addu, hi(dst), hi(dst), MIPS_R_T9); + break; + /* dst = dst - src */ + case BPF_SUB: + emit(ctx, sltu, MIPS_R_T9, lo(dst), lo(src)); + emit(ctx, subu, lo(dst), lo(dst), lo(src)); + emit(ctx, subu, hi(dst), hi(dst), hi(src)); + emit(ctx, subu, hi(dst), hi(dst), MIPS_R_T9); + break; + /* dst = dst | src */ + case BPF_OR: + emit(ctx, or, lo(dst), lo(dst), lo(src)); + emit(ctx, or, hi(dst), hi(dst), hi(src)); + break; + /* dst = dst & src */ + case BPF_AND: + emit(ctx, and, lo(dst), lo(dst), lo(src)); + emit(ctx, and, hi(dst), hi(dst), hi(src)); + break; + /* dst = dst ^ src */ + case BPF_XOR: + emit(ctx, xor, lo(dst), lo(dst), lo(src)); + emit(ctx, xor, hi(dst), hi(dst), hi(src)); + break; + } + clobber_reg64(ctx, dst); +} + +/* ALU invert (64-bit) */ +static void emit_neg_i64(struct jit_context *ctx, const u8 dst[]) +{ + emit(ctx, sltu, MIPS_R_T9, MIPS_R_ZERO, lo(dst)); + emit(ctx, subu, lo(dst), MIPS_R_ZERO, lo(dst)); + emit(ctx, subu, hi(dst), MIPS_R_ZERO, hi(dst)); + emit(ctx, subu, hi(dst), hi(dst), MIPS_R_T9); + + clobber_reg64(ctx, dst); +} + +/* ALU shift immediate (64-bit) */ +static void emit_shift_i64(struct jit_context *ctx, + const u8 dst[], u32 imm, u8 op) +{ + switch (BPF_OP(op)) { + /* dst = dst << imm */ + case BPF_LSH: + if (imm < 32) { + emit(ctx, srl, MIPS_R_T9, lo(dst), 32 - imm); + emit(ctx, sll, lo(dst), lo(dst), imm); + emit(ctx, sll, hi(dst), hi(dst), imm); + emit(ctx, or, hi(dst), hi(dst), MIPS_R_T9); + } else { + emit(ctx, sll, hi(dst), lo(dst), imm - 32); + emit(ctx, move, lo(dst), MIPS_R_ZERO); + } + break; + /* dst = dst >> imm */ + case BPF_RSH: + if (imm < 32) { + emit(ctx, sll, MIPS_R_T9, hi(dst), 32 - imm); + emit(ctx, srl, lo(dst), lo(dst), imm); + emit(ctx, srl, hi(dst), hi(dst), imm); + emit(ctx, or, lo(dst), lo(dst), MIPS_R_T9); + } else { + emit(ctx, srl, lo(dst), hi(dst), imm - 32); + emit(ctx, move, hi(dst), MIPS_R_ZERO); + } + break; + /* dst = dst >> imm (arithmetic) */ + case BPF_ARSH: + if (imm < 32) { + emit(ctx, sll, MIPS_R_T9, hi(dst), 32 - imm); + emit(ctx, srl, lo(dst), lo(dst), imm); + emit(ctx, sra, hi(dst), hi(dst), imm); + emit(ctx, or, lo(dst), lo(dst), MIPS_R_T9); + } else { + emit(ctx, sra, lo(dst), hi(dst), imm - 32); + emit(ctx, sra, hi(dst), hi(dst), 31); + } + break; + } + clobber_reg64(ctx, dst); +} + +/* ALU shift register (64-bit) */ +static void emit_shift_r64(struct jit_context *ctx, + const u8 dst[], u8 src, u8 op) +{ + u8 t1 = MIPS_R_T8; + u8 t2 = MIPS_R_T9; + + emit(ctx, andi, t1, src, 32); /* t1 = src & 32 */ + emit(ctx, beqz, t1, 16); /* PC += 16 if t1 == 0 */ + emit(ctx, nor, t2, src, MIPS_R_ZERO); /* t2 = ~src (delay slot) */ + + switch (BPF_OP(op)) { + /* dst = dst << src */ + case BPF_LSH: + /* Next: shift >= 32 */ + emit(ctx, sllv, hi(dst), lo(dst), src); /* dh = dl << src */ + emit(ctx, move, lo(dst), MIPS_R_ZERO); /* dl = 0 */ + emit(ctx, b, 20); /* PC += 20 */ + /* +16: shift < 32 */ + emit(ctx, srl, t1, lo(dst), 1); /* t1 = dl >> 1 */ + emit(ctx, srlv, t1, t1, t2); /* t1 = t1 >> t2 */ + emit(ctx, sllv, lo(dst), lo(dst), src); /* dl = dl << src */ + emit(ctx, sllv, hi(dst), hi(dst), src); /* dh = dh << src */ + emit(ctx, or, hi(dst), hi(dst), t1); /* dh = dh | t1 */ + break; + /* dst = dst >> src */ + case BPF_RSH: + /* Next: shift >= 32 */ + emit(ctx, srlv, lo(dst), hi(dst), src); /* dl = dh >> src */ + emit(ctx, move, hi(dst), MIPS_R_ZERO); /* dh = 0 */ + emit(ctx, b, 20); /* PC += 20 */ + /* +16: shift < 32 */ + emit(ctx, sll, t1, hi(dst), 1); /* t1 = dl << 1 */ + emit(ctx, sllv, t1, t1, t2); /* t1 = t1 << t2 */ + emit(ctx, srlv, lo(dst), lo(dst), src); /* dl = dl >> src */ + emit(ctx, srlv, hi(dst), hi(dst), src); /* dh = dh >> src */ + emit(ctx, or, lo(dst), lo(dst), t1); /* dl = dl | t1 */ + break; + /* dst = dst >> src (arithmetic) */ + case BPF_ARSH: + /* Next: shift >= 32 */ + emit(ctx, srav, lo(dst), hi(dst), src); /* dl = dh >>a src */ + emit(ctx, sra, hi(dst), hi(dst), 31); /* dh = dh >>a 31 */ + emit(ctx, b, 20); /* PC += 20 */ + /* +16: shift < 32 */ + emit(ctx, sll, t1, hi(dst), 1); /* t1 = dl << 1 */ + emit(ctx, sllv, t1, t1, t2); /* t1 = t1 << t2 */ + emit(ctx, srlv, lo(dst), lo(dst), src); /* dl = dl >>a src */ + emit(ctx, srav, hi(dst), hi(dst), src); /* dh = dh >> src */ + emit(ctx, or, lo(dst), lo(dst), t1); /* dl = dl | t1 */ + break; + } + + /* +20: Done */ + clobber_reg64(ctx, dst); +} + +/* ALU mul immediate (64x32-bit) */ +static void emit_mul_i64(struct jit_context *ctx, const u8 dst[], s32 imm) +{ + u8 src = MIPS_R_T6; + u8 tmp = MIPS_R_T9; + + switch (imm) { + /* dst = dst * 1 is a no-op */ + case 1: + break; + /* dst = dst * -1 */ + case -1: + emit_neg_i64(ctx, dst); + break; + case 0: + emit_mov_r(ctx, lo(dst), MIPS_R_ZERO); + emit_mov_r(ctx, hi(dst), MIPS_R_ZERO); + break; + /* Full 64x32 multiply */ + default: + /* hi(dst) = hi(dst) * src(imm) */ + emit_mov_i(ctx, src, imm); + if (cpu_has_mips32r1 || cpu_has_mips32r6) { + emit(ctx, mul, hi(dst), hi(dst), src); + } else { + emit(ctx, multu, hi(dst), src); + emit(ctx, mflo, hi(dst)); + } + + /* hi(dst) = hi(dst) - lo(dst) */ + if (imm < 0) + emit(ctx, subu, hi(dst), hi(dst), lo(dst)); + + /* tmp = lo(dst) * src(imm) >> 32 */ + /* lo(dst) = lo(dst) * src(imm) */ + if (cpu_has_mips32r6) { + emit(ctx, muhu, tmp, lo(dst), src); + emit(ctx, mulu, lo(dst), lo(dst), src); + } else { + emit(ctx, multu, lo(dst), src); + emit(ctx, mflo, lo(dst)); + emit(ctx, mfhi, tmp); + } + + /* hi(dst) += tmp */ + emit(ctx, addu, hi(dst), hi(dst), tmp); + clobber_reg64(ctx, dst); + break; + } +} + +/* ALU mul register (64x64-bit) */ +static void emit_mul_r64(struct jit_context *ctx, + const u8 dst[], const u8 src[]) +{ + u8 acc = MIPS_R_T8; + u8 tmp = MIPS_R_T9; + + /* acc = hi(dst) * lo(src) */ + if (cpu_has_mips32r1 || cpu_has_mips32r6) { + emit(ctx, mul, acc, hi(dst), lo(src)); + } else { + emit(ctx, multu, hi(dst), lo(src)); + emit(ctx, mflo, acc); + } + + /* tmp = lo(dst) * hi(src) */ + if (cpu_has_mips32r1 || cpu_has_mips32r6) { + emit(ctx, mul, tmp, lo(dst), hi(src)); + } else { + emit(ctx, multu, lo(dst), hi(src)); + emit(ctx, mflo, tmp); + } + + /* acc += tmp */ + emit(ctx, addu, acc, acc, tmp); + + /* tmp = lo(dst) * lo(src) >> 32 */ + /* lo(dst) = lo(dst) * lo(src) */ + if (cpu_has_mips32r6) { + emit(ctx, muhu, tmp, lo(dst), lo(src)); + emit(ctx, mulu, lo(dst), lo(dst), lo(src)); + } else { + emit(ctx, multu, lo(dst), lo(src)); + emit(ctx, mflo, lo(dst)); + emit(ctx, mfhi, tmp); + } + + /* hi(dst) = acc + tmp */ + emit(ctx, addu, hi(dst), acc, tmp); + clobber_reg64(ctx, dst); +} + +/* Helper function for 64-bit modulo */ +static u64 jit_mod64(u64 a, u64 b) +{ + u64 rem; + + div64_u64_rem(a, b, &rem); + return rem; +} + +/* ALU div/mod register (64-bit) */ +static void emit_divmod_r64(struct jit_context *ctx, + const u8 dst[], const u8 src[], u8 op) +{ + const u8 *r0 = bpf2mips32[BPF_REG_0]; /* Mapped to v0-v1 */ + const u8 *r1 = bpf2mips32[BPF_REG_1]; /* Mapped to a0-a1 */ + const u8 *r2 = bpf2mips32[BPF_REG_2]; /* Mapped to a2-a3 */ + int exclude, k; + u32 addr = 0; + + /* Push caller-saved registers on stack */ + push_regs(ctx, ctx->clobbered & JIT_CALLER_REGS, + 0, JIT_RESERVED_STACK); + + /* Put 64-bit arguments 1 and 2 in registers a0-a3 */ + for (k = 0; k < 2; k++) { + emit(ctx, move, MIPS_R_T9, src[k]); + emit(ctx, move, r1[k], dst[k]); + emit(ctx, move, r2[k], MIPS_R_T9); + } + + /* Emit function call */ + switch (BPF_OP(op)) { + /* dst = dst / src */ + case BPF_DIV: + addr = (u32)&div64_u64; + break; + /* dst = dst % src */ + case BPF_MOD: + addr = (u32)&jit_mod64; + break; + } + emit_mov_i(ctx, MIPS_R_T9, addr); + emit(ctx, jalr, MIPS_R_RA, MIPS_R_T9); + emit(ctx, nop); /* Delay slot */ + + /* Store the 64-bit result in dst */ + emit(ctx, move, dst[0], r0[0]); + emit(ctx, move, dst[1], r0[1]); + + /* Restore caller-saved registers, excluding the computed result */ + exclude = BIT(lo(dst)) | BIT(hi(dst)); + pop_regs(ctx, ctx->clobbered & JIT_CALLER_REGS, + exclude, JIT_RESERVED_STACK); + emit_load_delay(ctx); + + clobber_reg64(ctx, dst); + clobber_reg(ctx, MIPS_R_V0); + clobber_reg(ctx, MIPS_R_V1); + clobber_reg(ctx, MIPS_R_RA); +} + +/* Swap bytes in a register word */ +static void emit_swap8_r(struct jit_context *ctx, u8 dst, u8 src, u8 mask) +{ + u8 tmp = MIPS_R_T9; + + emit(ctx, and, tmp, src, mask); /* tmp = src & 0x00ff00ff */ + emit(ctx, sll, tmp, tmp, 8); /* tmp = tmp << 8 */ + emit(ctx, srl, dst, src, 8); /* dst = src >> 8 */ + emit(ctx, and, dst, dst, mask); /* dst = dst & 0x00ff00ff */ + emit(ctx, or, dst, dst, tmp); /* dst = dst | tmp */ +} + +/* Swap half words in a register word */ +static void emit_swap16_r(struct jit_context *ctx, u8 dst, u8 src) +{ + u8 tmp = MIPS_R_T9; + + emit(ctx, sll, tmp, src, 16); /* tmp = src << 16 */ + emit(ctx, srl, dst, src, 16); /* dst = src >> 16 */ + emit(ctx, or, dst, dst, tmp); /* dst = dst | tmp */ +} + +/* Swap bytes and truncate a register double word, word or half word */ +static void emit_bswap_r64(struct jit_context *ctx, const u8 dst[], u32 width) +{ + u8 tmp = MIPS_R_T8; + + switch (width) { + /* Swap bytes in a double word */ + case 64: + if (cpu_has_mips32r2 || cpu_has_mips32r6) { + emit(ctx, rotr, tmp, hi(dst), 16); + emit(ctx, rotr, hi(dst), lo(dst), 16); + emit(ctx, wsbh, lo(dst), tmp); + emit(ctx, wsbh, hi(dst), hi(dst)); + } else { + emit_swap16_r(ctx, tmp, lo(dst)); + emit_swap16_r(ctx, lo(dst), hi(dst)); + emit(ctx, move, hi(dst), tmp); + + emit(ctx, lui, tmp, 0xff); /* tmp = 0x00ff0000 */ + emit(ctx, ori, tmp, tmp, 0xff); /* tmp = 0x00ff00ff */ + emit_swap8_r(ctx, lo(dst), lo(dst), tmp); + emit_swap8_r(ctx, hi(dst), hi(dst), tmp); + } + break; + /* Swap bytes in a word */ + /* Swap bytes in a half word */ + case 32: + case 16: + emit_bswap_r(ctx, lo(dst), width); + emit(ctx, move, hi(dst), MIPS_R_ZERO); + break; + } + clobber_reg64(ctx, dst); +} + +/* Truncate a register double word, word or half word */ +static void emit_trunc_r64(struct jit_context *ctx, const u8 dst[], u32 width) +{ + switch (width) { + case 64: + break; + /* Zero-extend a word */ + case 32: + emit(ctx, move, hi(dst), MIPS_R_ZERO); + clobber_reg(ctx, hi(dst)); + break; + /* Zero-extend a half word */ + case 16: + emit(ctx, move, hi(dst), MIPS_R_ZERO); + emit(ctx, andi, lo(dst), lo(dst), 0xffff); + clobber_reg64(ctx, dst); + break; + } +} + +/* Load operation: dst = *(size*)(src + off) */ +static void emit_ldx(struct jit_context *ctx, + const u8 dst[], u8 src, s16 off, u8 size) +{ + switch (size) { + /* Load a byte */ + case BPF_B: + emit(ctx, lbu, lo(dst), off, src); + emit(ctx, move, hi(dst), MIPS_R_ZERO); + break; + /* Load a half word */ + case BPF_H: + emit(ctx, lhu, lo(dst), off, src); + emit(ctx, move, hi(dst), MIPS_R_ZERO); + break; + /* Load a word */ + case BPF_W: + emit(ctx, lw, lo(dst), off, src); + emit(ctx, move, hi(dst), MIPS_R_ZERO); + break; + /* Load a double word */ + case BPF_DW: + if (dst[1] == src) { + emit(ctx, lw, dst[0], off + 4, src); + emit(ctx, lw, dst[1], off, src); + } else { + emit(ctx, lw, dst[1], off, src); + emit(ctx, lw, dst[0], off + 4, src); + } + emit_load_delay(ctx); + break; + } + clobber_reg64(ctx, dst); +} + +/* Store operation: *(size *)(dst + off) = src */ +static void emit_stx(struct jit_context *ctx, + const u8 dst, const u8 src[], s16 off, u8 size) +{ + switch (size) { + /* Store a byte */ + case BPF_B: + emit(ctx, sb, lo(src), off, dst); + break; + /* Store a half word */ + case BPF_H: + emit(ctx, sh, lo(src), off, dst); + break; + /* Store a word */ + case BPF_W: + emit(ctx, sw, lo(src), off, dst); + break; + /* Store a double word */ + case BPF_DW: + emit(ctx, sw, src[1], off, dst); + emit(ctx, sw, src[0], off + 4, dst); + break; + } +} + +/* Atomic read-modify-write (32-bit, non-ll/sc fallback) */ +static void emit_atomic_r32(struct jit_context *ctx, + u8 dst, u8 src, s16 off, u8 code) +{ + u32 exclude = 0; + u32 addr = 0; + + /* Push caller-saved registers on stack */ + push_regs(ctx, ctx->clobbered & JIT_CALLER_REGS, + 0, JIT_RESERVED_STACK); + /* + * Argument 1: dst+off if xchg, otherwise src, passed in register a0 + * Argument 2: src if xchg, othersize dst+off, passed in register a1 + */ + emit(ctx, move, MIPS_R_T9, dst); + if (code == BPF_XCHG) { + emit(ctx, move, MIPS_R_A1, src); + emit(ctx, addiu, MIPS_R_A0, MIPS_R_T9, off); + } else { + emit(ctx, move, MIPS_R_A0, src); + emit(ctx, addiu, MIPS_R_A1, MIPS_R_T9, off); + } + + /* Emit function call */ + switch (code) { + case BPF_ADD: + addr = (u32)&atomic_add; + break; + case BPF_ADD | BPF_FETCH: + addr = (u32)&atomic_fetch_add; + break; + case BPF_SUB: + addr = (u32)&atomic_sub; + break; + case BPF_SUB | BPF_FETCH: + addr = (u32)&atomic_fetch_sub; + break; + case BPF_OR: + addr = (u32)&atomic_or; + break; + case BPF_OR | BPF_FETCH: + addr = (u32)&atomic_fetch_or; + break; + case BPF_AND: + addr = (u32)&atomic_and; + break; + case BPF_AND | BPF_FETCH: + addr = (u32)&atomic_fetch_and; + break; + case BPF_XOR: + addr = (u32)&atomic_xor; + break; + case BPF_XOR | BPF_FETCH: + addr = (u32)&atomic_fetch_xor; + break; + case BPF_XCHG: + addr = (u32)&atomic_xchg; + break; + } + emit_mov_i(ctx, MIPS_R_T9, addr); + emit(ctx, jalr, MIPS_R_RA, MIPS_R_T9); + emit(ctx, nop); /* Delay slot */ + + /* Update src register with old value, if specified */ + if (code & BPF_FETCH) { + emit(ctx, move, src, MIPS_R_V0); + exclude = BIT(src); + clobber_reg(ctx, src); + } + + /* Restore caller-saved registers, except any fetched value */ + pop_regs(ctx, ctx->clobbered & JIT_CALLER_REGS, + exclude, JIT_RESERVED_STACK); + emit_load_delay(ctx); + clobber_reg(ctx, MIPS_R_RA); +} + +/* Helper function for 64-bit atomic exchange */ +static s64 jit_xchg64(s64 a, atomic64_t *v) +{ + return atomic64_xchg(v, a); +} + +/* Atomic read-modify-write (64-bit) */ +static void emit_atomic_r64(struct jit_context *ctx, + u8 dst, const u8 src[], s16 off, u8 code) +{ + const u8 *r0 = bpf2mips32[BPF_REG_0]; /* Mapped to v0-v1 */ + const u8 *r1 = bpf2mips32[BPF_REG_1]; /* Mapped to a0-a1 */ + u32 exclude = 0; + u32 addr = 0; + + /* Push caller-saved registers on stack */ + push_regs(ctx, ctx->clobbered & JIT_CALLER_REGS, + 0, JIT_RESERVED_STACK); + /* + * Argument 1: 64-bit src, passed in registers a0-a1 + * Argument 2: 32-bit dst+off, passed in register a2 + */ + emit(ctx, move, MIPS_R_T9, dst); + emit(ctx, move, r1[0], src[0]); + emit(ctx, move, r1[1], src[1]); + emit(ctx, addiu, MIPS_R_A2, MIPS_R_T9, off); + + /* Emit function call */ + switch (code) { + case BPF_ADD: + addr = (u32)&atomic64_add; + break; + case BPF_ADD | BPF_FETCH: + addr = (u32)&atomic64_fetch_add; + break; + case BPF_SUB: + addr = (u32)&atomic64_sub; + break; + case BPF_SUB | BPF_FETCH: + addr = (u32)&atomic64_fetch_sub; + break; + case BPF_OR: + addr = (u32)&atomic64_or; + break; + case BPF_OR | BPF_FETCH: + addr = (u32)&atomic64_fetch_or; + break; + case BPF_AND: + addr = (u32)&atomic64_and; + break; + case BPF_AND | BPF_FETCH: + addr = (u32)&atomic64_fetch_and; + break; + case BPF_XOR: + addr = (u32)&atomic64_xor; + break; + case BPF_XOR | BPF_FETCH: + addr = (u32)&atomic64_fetch_xor; + break; + case BPF_XCHG: + addr = (u32)&jit_xchg64; + break; + } + emit_mov_i(ctx, MIPS_R_T9, addr); + emit(ctx, jalr, MIPS_R_RA, MIPS_R_T9); + emit(ctx, nop); /* Delay slot */ + + /* Update src register with old value, if specified */ + if (code & BPF_FETCH) { + emit(ctx, move, lo(src), lo(r0)); + emit(ctx, move, hi(src), hi(r0)); + exclude = BIT(src[0]) | BIT(src[1]); + clobber_reg64(ctx, src); + } + + /* Restore caller-saved registers, except any fetched value */ + pop_regs(ctx, ctx->clobbered & JIT_CALLER_REGS, + exclude, JIT_RESERVED_STACK); + emit_load_delay(ctx); + clobber_reg(ctx, MIPS_R_RA); +} + +/* Atomic compare-and-exchange (32-bit, non-ll/sc fallback) */ +static void emit_cmpxchg_r32(struct jit_context *ctx, u8 dst, u8 src, s16 off) +{ + const u8 *r0 = bpf2mips32[BPF_REG_0]; + + /* Push caller-saved registers on stack */ + push_regs(ctx, ctx->clobbered & JIT_CALLER_REGS, + JIT_RETURN_REGS, JIT_RESERVED_STACK + 2 * sizeof(u32)); + /* + * Argument 1: 32-bit dst+off, passed in register a0 + * Argument 2: 32-bit r0, passed in register a1 + * Argument 3: 32-bit src, passed in register a2 + */ + emit(ctx, addiu, MIPS_R_T9, dst, off); + emit(ctx, move, MIPS_R_T8, src); + emit(ctx, move, MIPS_R_A1, lo(r0)); + emit(ctx, move, MIPS_R_A0, MIPS_R_T9); + emit(ctx, move, MIPS_R_A2, MIPS_R_T8); + + /* Emit function call */ + emit_mov_i(ctx, MIPS_R_T9, (u32)&atomic_cmpxchg); + emit(ctx, jalr, MIPS_R_RA, MIPS_R_T9); + emit(ctx, nop); /* Delay slot */ + +#ifdef __BIG_ENDIAN + emit(ctx, move, lo(r0), MIPS_R_V0); +#endif + /* Restore caller-saved registers, except the return value */ + pop_regs(ctx, ctx->clobbered & JIT_CALLER_REGS, + JIT_RETURN_REGS, JIT_RESERVED_STACK + 2 * sizeof(u32)); + emit_load_delay(ctx); + clobber_reg(ctx, MIPS_R_V0); + clobber_reg(ctx, MIPS_R_V1); + clobber_reg(ctx, MIPS_R_RA); +} + +/* Atomic compare-and-exchange (64-bit) */ +static void emit_cmpxchg_r64(struct jit_context *ctx, + u8 dst, const u8 src[], s16 off) +{ + const u8 *r0 = bpf2mips32[BPF_REG_0]; + const u8 *r2 = bpf2mips32[BPF_REG_2]; + + /* Push caller-saved registers on stack */ + push_regs(ctx, ctx->clobbered & JIT_CALLER_REGS, + JIT_RETURN_REGS, JIT_RESERVED_STACK + 2 * sizeof(u32)); + /* + * Argument 1: 32-bit dst+off, passed in register a0 (a1 unused) + * Argument 2: 64-bit r0, passed in registers a2-a3 + * Argument 3: 64-bit src, passed on stack + */ + push_regs(ctx, BIT(src[0]) | BIT(src[1]), 0, JIT_RESERVED_STACK); + emit(ctx, addiu, MIPS_R_T9, dst, off); + emit(ctx, move, r2[0], r0[0]); + emit(ctx, move, r2[1], r0[1]); + emit(ctx, move, MIPS_R_A0, MIPS_R_T9); + + /* Emit function call */ + emit_mov_i(ctx, MIPS_R_T9, (u32)&atomic64_cmpxchg); + emit(ctx, jalr, MIPS_R_RA, MIPS_R_T9); + emit(ctx, nop); /* Delay slot */ + + /* Restore caller-saved registers, except the return value */ + pop_regs(ctx, ctx->clobbered & JIT_CALLER_REGS, + JIT_RETURN_REGS, JIT_RESERVED_STACK + 2 * sizeof(u32)); + emit_load_delay(ctx); + clobber_reg(ctx, MIPS_R_V0); + clobber_reg(ctx, MIPS_R_V1); + clobber_reg(ctx, MIPS_R_RA); +} + +/* + * Conditional movz or an emulated equivalent. + * Note that the rs register may be modified. + */ +static void emit_movz_r(struct jit_context *ctx, u8 rd, u8 rs, u8 rt) +{ + if (cpu_has_mips_2) { + emit(ctx, movz, rd, rs, rt); /* rd = rt ? rd : rs */ + } else if (cpu_has_mips32r6) { + if (rs != MIPS_R_ZERO) + emit(ctx, seleqz, rs, rs, rt); /* rs = 0 if rt == 0 */ + emit(ctx, selnez, rd, rd, rt); /* rd = 0 if rt != 0 */ + if (rs != MIPS_R_ZERO) + emit(ctx, or, rd, rd, rs); /* rd = rd | rs */ + } else { + emit(ctx, bnez, rt, 8); /* PC += 8 if rd != 0 */ + emit(ctx, nop); /* +0: delay slot */ + emit(ctx, or, rd, rs, MIPS_R_ZERO); /* +4: rd = rs */ + } + clobber_reg(ctx, rd); + clobber_reg(ctx, rs); +} + +/* + * Conditional movn or an emulated equivalent. + * Note that the rs register may be modified. + */ +static void emit_movn_r(struct jit_context *ctx, u8 rd, u8 rs, u8 rt) +{ + if (cpu_has_mips_2) { + emit(ctx, movn, rd, rs, rt); /* rd = rt ? rs : rd */ + } else if (cpu_has_mips32r6) { + if (rs != MIPS_R_ZERO) + emit(ctx, selnez, rs, rs, rt); /* rs = 0 if rt == 0 */ + emit(ctx, seleqz, rd, rd, rt); /* rd = 0 if rt != 0 */ + if (rs != MIPS_R_ZERO) + emit(ctx, or, rd, rd, rs); /* rd = rd | rs */ + } else { + emit(ctx, beqz, rt, 8); /* PC += 8 if rd == 0 */ + emit(ctx, nop); /* +0: delay slot */ + emit(ctx, or, rd, rs, MIPS_R_ZERO); /* +4: rd = rs */ + } + clobber_reg(ctx, rd); + clobber_reg(ctx, rs); +} + +/* Emulation of 64-bit sltiu rd, rs, imm, where imm may be S32_MAX + 1 */ +static void emit_sltiu_r64(struct jit_context *ctx, u8 rd, + const u8 rs[], s64 imm) +{ + u8 tmp = MIPS_R_T9; + + if (imm < 0) { + emit_mov_i(ctx, rd, imm); /* rd = imm */ + emit(ctx, sltu, rd, lo(rs), rd); /* rd = rsl < rd */ + emit(ctx, sltiu, tmp, hi(rs), -1); /* tmp = rsh < ~0U */ + emit(ctx, or, rd, rd, tmp); /* rd = rd | tmp */ + } else { /* imm >= 0 */ + if (imm > 0x7fff) { + emit_mov_i(ctx, rd, (s32)imm); /* rd = imm */ + emit(ctx, sltu, rd, lo(rs), rd); /* rd = rsl < rd */ + } else { + emit(ctx, sltiu, rd, lo(rs), imm); /* rd = rsl < imm */ + } + emit_movn_r(ctx, rd, MIPS_R_ZERO, hi(rs)); /* rd = 0 if rsh */ + } +} + +/* Emulation of 64-bit sltu rd, rs, rt */ +static void emit_sltu_r64(struct jit_context *ctx, u8 rd, + const u8 rs[], const u8 rt[]) +{ + u8 tmp = MIPS_R_T9; + + emit(ctx, sltu, rd, lo(rs), lo(rt)); /* rd = rsl < rtl */ + emit(ctx, subu, tmp, hi(rs), hi(rt)); /* tmp = rsh - rth */ + emit_movn_r(ctx, rd, MIPS_R_ZERO, tmp); /* rd = 0 if tmp != 0 */ + emit(ctx, sltu, tmp, hi(rs), hi(rt)); /* tmp = rsh < rth */ + emit(ctx, or, rd, rd, tmp); /* rd = rd | tmp */ +} + +/* Emulation of 64-bit slti rd, rs, imm, where imm may be S32_MAX + 1 */ +static void emit_slti_r64(struct jit_context *ctx, u8 rd, + const u8 rs[], s64 imm) +{ + u8 t1 = MIPS_R_T8; + u8 t2 = MIPS_R_T9; + u8 cmp; + + /* + * if ((rs < 0) ^ (imm < 0)) t1 = imm >u rsl + * else t1 = rsl > 31 */ + if (imm < 0) + emit_movz_r(ctx, t1, t2, rd); /* t1 = rd ? t1 : t2 */ + else + emit_movn_r(ctx, t1, t2, rd); /* t1 = rd ? t2 : t1 */ + /* + * if ((imm < 0 && rsh != 0xffffffff) || + * (imm >= 0 && rsh != 0)) + * t1 = 0 + */ + if (imm < 0) { + emit(ctx, addiu, rd, hi(rs), 1); /* rd = rsh + 1 */ + cmp = rd; + } else { /* imm >= 0 */ + cmp = hi(rs); + } + emit_movn_r(ctx, t1, MIPS_R_ZERO, cmp); /* t1 = 0 if cmp != 0 */ + + /* + * if (imm < 0) rd = rsh < -1 + * else rd = rsh != 0 + * rd = rd | t1 + */ + emit(ctx, slti, rd, hi(rs), imm < 0 ? -1 : 0); /* rd = rsh < hi(imm) */ + emit(ctx, or, rd, rd, t1); /* rd = rd | t1 */ +} + +/* Emulation of 64-bit(slt rd, rs, rt) */ +static void emit_slt_r64(struct jit_context *ctx, u8 rd, + const u8 rs[], const u8 rt[]) +{ + u8 t1 = MIPS_R_T7; + u8 t2 = MIPS_R_T8; + u8 t3 = MIPS_R_T9; + + /* + * if ((rs < 0) ^ (rt < 0)) t1 = rtl > 31 */ + emit_movn_r(ctx, t1, t2, rd); /* t1 = rd ? t2 : t1 */ + emit_movn_r(ctx, t1, MIPS_R_ZERO, t3); /* t1 = 0 if t3 != 0 */ + + /* rd = (rsh < rth) | t1 */ + emit(ctx, slt, rd, hi(rs), hi(rt)); /* rd = rsh = -0x7fff && imm <= 0x8000) { + emit(ctx, addiu, tmp, lo(dst), -imm); + } else if ((u32)imm <= 0xffff) { + emit(ctx, xori, tmp, lo(dst), imm); + } else { /* Register fallback */ + emit_mov_i(ctx, tmp, imm); + emit(ctx, xor, tmp, lo(dst), tmp); + } + if (imm < 0) { /* Compare sign extension */ + emit(ctx, addu, MIPS_R_T9, hi(dst), 1); + emit(ctx, or, tmp, tmp, MIPS_R_T9); + } else { /* Compare zero extension */ + emit(ctx, or, tmp, tmp, hi(dst)); + } + if (op == BPF_JEQ) + emit(ctx, beqz, tmp, off); + else /* BPF_JNE */ + emit(ctx, bnez, tmp, off); + break; + /* PC += off if dst & imm */ + /* PC += off if (dst & imm) == 0 (not in BPF, used for long jumps) */ + case BPF_JSET: + case JIT_JNSET: + if ((u32)imm <= 0xffff) { + emit(ctx, andi, tmp, lo(dst), imm); + } else { /* Register fallback */ + emit_mov_i(ctx, tmp, imm); + emit(ctx, and, tmp, lo(dst), tmp); + } + if (imm < 0) /* Sign-extension pulls in high word */ + emit(ctx, or, tmp, tmp, hi(dst)); + if (op == BPF_JSET) + emit(ctx, bnez, tmp, off); + else /* JIT_JNSET */ + emit(ctx, beqz, tmp, off); + break; + /* PC += off if dst > imm */ + case BPF_JGT: + emit_sltiu_r64(ctx, tmp, dst, (s64)imm + 1); + emit(ctx, beqz, tmp, off); + break; + /* PC += off if dst >= imm */ + case BPF_JGE: + emit_sltiu_r64(ctx, tmp, dst, imm); + emit(ctx, beqz, tmp, off); + break; + /* PC += off if dst < imm */ + case BPF_JLT: + emit_sltiu_r64(ctx, tmp, dst, imm); + emit(ctx, bnez, tmp, off); + break; + /* PC += off if dst <= imm */ + case BPF_JLE: + emit_sltiu_r64(ctx, tmp, dst, (s64)imm + 1); + emit(ctx, bnez, tmp, off); + break; + /* PC += off if dst > imm (signed) */ + case BPF_JSGT: + emit_slti_r64(ctx, tmp, dst, (s64)imm + 1); + emit(ctx, beqz, tmp, off); + break; + /* PC += off if dst >= imm (signed) */ + case BPF_JSGE: + emit_slti_r64(ctx, tmp, dst, imm); + emit(ctx, beqz, tmp, off); + break; + /* PC += off if dst < imm (signed) */ + case BPF_JSLT: + emit_slti_r64(ctx, tmp, dst, imm); + emit(ctx, bnez, tmp, off); + break; + /* PC += off if dst <= imm (signed) */ + case BPF_JSLE: + emit_slti_r64(ctx, tmp, dst, (s64)imm + 1); + emit(ctx, bnez, tmp, off); + break; + } +} + +/* Jump register (64-bit) */ +static void emit_jmp_r64(struct jit_context *ctx, + const u8 dst[], const u8 src[], s32 off, u8 op) +{ + u8 t1 = MIPS_R_T6; + u8 t2 = MIPS_R_T7; + + switch (op) { + /* No-op, used internally for branch optimization */ + case JIT_JNOP: + break; + /* PC += off if dst == src */ + /* PC += off if dst != src */ + case BPF_JEQ: + case BPF_JNE: + emit(ctx, subu, t1, lo(dst), lo(src)); + emit(ctx, subu, t2, hi(dst), hi(src)); + emit(ctx, or, t1, t1, t2); + if (op == BPF_JEQ) + emit(ctx, beqz, t1, off); + else /* BPF_JNE */ + emit(ctx, bnez, t1, off); + break; + /* PC += off if dst & src */ + /* PC += off if (dst & imm) == 0 (not in BPF, used for long jumps) */ + case BPF_JSET: + case JIT_JNSET: + emit(ctx, and, t1, lo(dst), lo(src)); + emit(ctx, and, t2, hi(dst), hi(src)); + emit(ctx, or, t1, t1, t2); + if (op == BPF_JSET) + emit(ctx, bnez, t1, off); + else /* JIT_JNSET */ + emit(ctx, beqz, t1, off); + break; + /* PC += off if dst > src */ + case BPF_JGT: + emit_sltu_r64(ctx, t1, src, dst); + emit(ctx, bnez, t1, off); + break; + /* PC += off if dst >= src */ + case BPF_JGE: + emit_sltu_r64(ctx, t1, dst, src); + emit(ctx, beqz, t1, off); + break; + /* PC += off if dst < src */ + case BPF_JLT: + emit_sltu_r64(ctx, t1, dst, src); + emit(ctx, bnez, t1, off); + break; + /* PC += off if dst <= src */ + case BPF_JLE: + emit_sltu_r64(ctx, t1, src, dst); + emit(ctx, beqz, t1, off); + break; + /* PC += off if dst > src (signed) */ + case BPF_JSGT: + emit_slt_r64(ctx, t1, src, dst); + emit(ctx, bnez, t1, off); + break; + /* PC += off if dst >= src (signed) */ + case BPF_JSGE: + emit_slt_r64(ctx, t1, dst, src); + emit(ctx, beqz, t1, off); + break; + /* PC += off if dst < src (signed) */ + case BPF_JSLT: + emit_slt_r64(ctx, t1, dst, src); + emit(ctx, bnez, t1, off); + break; + /* PC += off if dst <= src (signed) */ + case BPF_JSLE: + emit_slt_r64(ctx, t1, src, dst); + emit(ctx, beqz, t1, off); + break; + } +} + +/* Function call */ +static int emit_call(struct jit_context *ctx, const struct bpf_insn *insn) +{ + bool fixed; + u64 addr; + + /* Decode the call address */ + if (bpf_jit_get_func_addr(ctx->program, insn, false, + &addr, &fixed) < 0) + return -1; + if (!fixed) + return -1; + + /* Push stack arguments */ + push_regs(ctx, JIT_STACK_REGS, 0, JIT_RESERVED_STACK); + + /* Emit function call */ + emit_mov_i(ctx, MIPS_R_T9, addr); + emit(ctx, jalr, MIPS_R_RA, MIPS_R_T9); + emit(ctx, nop); /* Delay slot */ + + clobber_reg(ctx, MIPS_R_RA); + clobber_reg(ctx, MIPS_R_V0); + clobber_reg(ctx, MIPS_R_V1); + return 0; +} + +/* Function tail call */ +static int emit_tail_call(struct jit_context *ctx) +{ + u8 ary = lo(bpf2mips32[BPF_REG_2]); + u8 ind = lo(bpf2mips32[BPF_REG_3]); + u8 t1 = MIPS_R_T8; + u8 t2 = MIPS_R_T9; + int off; + + /* + * Tail call: + * eBPF R1 - function argument (context ptr), passed in a0-a1 + * eBPF R2 - ptr to object with array of function entry points + * eBPF R3 - array index of function to be called + * stack[sz] - remaining tail call count, initialized in prologue + */ + + /* if (ind >= ary->map.max_entries) goto out */ + off = offsetof(struct bpf_array, map.max_entries); + if (off > 0x7fff) + return -1; + emit(ctx, lw, t1, off, ary); /* t1 = ary->map.max_entries*/ + emit_load_delay(ctx); /* Load delay slot */ + emit(ctx, sltu, t1, ind, t1); /* t1 = ind < t1 */ + emit(ctx, beqz, t1, get_offset(ctx, 1)); /* PC += off(1) if t1 == 0 */ + /* (next insn delay slot) */ + /* if (TCC-- <= 0) goto out */ + emit(ctx, lw, t2, ctx->stack_size, MIPS_R_SP); /* t2 = *(SP + size) */ + emit_load_delay(ctx); /* Load delay slot */ + emit(ctx, blez, t2, get_offset(ctx, 1)); /* PC += off(1) if t2 < 0 */ + emit(ctx, addiu, t2, t2, -1); /* t2-- (delay slot) */ + emit(ctx, sw, t2, ctx->stack_size, MIPS_R_SP); /* *(SP + size) = t2 */ + + /* prog = ary->ptrs[ind] */ + off = offsetof(struct bpf_array, ptrs); + if (off > 0x7fff) + return -1; + emit(ctx, sll, t1, ind, 2); /* t1 = ind << 2 */ + emit(ctx, addu, t1, t1, ary); /* t1 += ary */ + emit(ctx, lw, t2, off, t1); /* t2 = *(t1 + off) */ + emit_load_delay(ctx); /* Load delay slot */ + + /* if (prog == 0) goto out */ + emit(ctx, beqz, t2, get_offset(ctx, 1)); /* PC += off(1) if t2 == 0 */ + emit(ctx, nop); /* Delay slot */ + + /* func = prog->bpf_func + 8 (prologue skip offset) */ + off = offsetof(struct bpf_prog, bpf_func); + if (off > 0x7fff) + return -1; + emit(ctx, lw, t1, off, t2); /* t1 = *(t2 + off) */ + emit_load_delay(ctx); /* Load delay slot */ + emit(ctx, addiu, t1, t1, JIT_TCALL_SKIP); /* t1 += skip (8 or 12) */ + + /* goto func */ + build_epilogue(ctx, t1); + return 0; +} + +/* + * Stack frame layout for a JITed program (stack grows down). + * + * Higher address : Caller's stack frame : + * :----------------------------: + * : 64-bit eBPF args r3-r5 : + * :----------------------------: + * : Reserved / tail call count : + * +============================+ <--- MIPS sp before call + * | Callee-saved registers, | + * | including RA and FP | + * +----------------------------+ <--- eBPF FP (MIPS zero,fp) + * | Local eBPF variables | + * | allocated by program | + * +----------------------------+ + * | Reserved for caller-saved | + * | registers | + * +----------------------------+ + * | Reserved for 64-bit eBPF | + * | args r3-r5 & args passed | + * | on stack in kernel calls | + * Lower address +============================+ <--- MIPS sp + */ + +/* Build program prologue to set up the stack and registers */ +void build_prologue(struct jit_context *ctx) +{ + const u8 *r1 = bpf2mips32[BPF_REG_1]; + const u8 *fp = bpf2mips32[BPF_REG_FP]; + int stack, saved, locals, reserved; + + /* + * The first two instructions initialize TCC in the reserved (for us) + * 16-byte area in the parent's stack frame. On a tail call, the + * calling function jumps into the prologue after these instructions. + */ + emit(ctx, ori, MIPS_R_T9, MIPS_R_ZERO, + min(MAX_TAIL_CALL_CNT + 1, 0xffff)); + emit(ctx, sw, MIPS_R_T9, 0, MIPS_R_SP); + + /* + * Register eBPF R1 contains the 32-bit context pointer argument. + * A 32-bit argument is always passed in MIPS register a0, regardless + * of CPU endianness. Initialize R1 accordingly and zero-extend. + */ +#ifdef __BIG_ENDIAN + emit(ctx, move, lo(r1), MIPS_R_A0); +#endif + + /* === Entry-point for tail calls === */ + + /* Zero-extend the 32-bit argument */ + emit(ctx, move, hi(r1), MIPS_R_ZERO); + + /* If the eBPF frame pointer was accessed it must be saved */ + if (ctx->accessed & BIT(BPF_REG_FP)) + clobber_reg64(ctx, fp); + + /* Compute the stack space needed for callee-saved registers */ + saved = hweight32(ctx->clobbered & JIT_CALLEE_REGS) * sizeof(u32); + saved = ALIGN(saved, MIPS_STACK_ALIGNMENT); + + /* Stack space used by eBPF program local data */ + locals = ALIGN(ctx->program->aux->stack_depth, MIPS_STACK_ALIGNMENT); + + /* + * If we are emitting function calls, reserve extra stack space for + * caller-saved registers and function arguments passed on the stack. + * The required space is computed automatically during resource + * usage discovery (pass 1). + */ + reserved = ctx->stack_used; + + /* Allocate the stack frame */ + stack = ALIGN(saved + locals + reserved, MIPS_STACK_ALIGNMENT); + emit(ctx, addiu, MIPS_R_SP, MIPS_R_SP, -stack); + + /* Store callee-saved registers on stack */ + push_regs(ctx, ctx->clobbered & JIT_CALLEE_REGS, 0, stack - saved); + + /* Initialize the eBPF frame pointer if accessed */ + if (ctx->accessed & BIT(BPF_REG_FP)) + emit(ctx, addiu, lo(fp), MIPS_R_SP, stack - saved); + + ctx->saved_size = saved; + ctx->stack_size = stack; +} + +/* Build the program epilogue to restore the stack and registers */ +void build_epilogue(struct jit_context *ctx, int dest_reg) +{ + /* Restore callee-saved registers from stack */ + pop_regs(ctx, ctx->clobbered & JIT_CALLEE_REGS, 0, + ctx->stack_size - ctx->saved_size); + /* + * A 32-bit return value is always passed in MIPS register v0, + * but on big-endian targets the low part of R0 is mapped to v1. + */ +#ifdef __BIG_ENDIAN + emit(ctx, move, MIPS_R_V0, MIPS_R_V1); +#endif + + /* Jump to the return address and adjust the stack pointer */ + emit(ctx, jr, dest_reg); + emit(ctx, addiu, MIPS_R_SP, MIPS_R_SP, ctx->stack_size); +} + +/* Build one eBPF instruction */ +int build_insn(const struct bpf_insn *insn, struct jit_context *ctx) +{ + const u8 *dst = bpf2mips32[insn->dst_reg]; + const u8 *src = bpf2mips32[insn->src_reg]; + const u8 *res = bpf2mips32[BPF_REG_0]; + const u8 *tmp = bpf2mips32[JIT_REG_TMP]; + u8 code = insn->code; + s16 off = insn->off; + s32 imm = insn->imm; + s32 val, rel; + u8 alu, jmp; + + switch (code) { + /* ALU operations */ + /* dst = imm */ + case BPF_ALU | BPF_MOV | BPF_K: + emit_mov_i(ctx, lo(dst), imm); + emit_zext_ver(ctx, dst); + break; + /* dst = src */ + case BPF_ALU | BPF_MOV | BPF_X: + if (imm == 1) { + /* Special mov32 for zext */ + emit_mov_i(ctx, hi(dst), 0); + } else { + emit_mov_r(ctx, lo(dst), lo(src)); + emit_zext_ver(ctx, dst); + } + break; + /* dst = -dst */ + case BPF_ALU | BPF_NEG: + emit_alu_i(ctx, lo(dst), 0, BPF_NEG); + emit_zext_ver(ctx, dst); + break; + /* dst = dst & imm */ + /* dst = dst | imm */ + /* dst = dst ^ imm */ + /* dst = dst << imm */ + /* dst = dst >> imm */ + /* dst = dst >> imm (arithmetic) */ + /* dst = dst + imm */ + /* dst = dst - imm */ + /* dst = dst * imm */ + /* dst = dst / imm */ + /* dst = dst % imm */ + case BPF_ALU | BPF_OR | BPF_K: + case BPF_ALU | BPF_AND | BPF_K: + case BPF_ALU | BPF_XOR | BPF_K: + case BPF_ALU | BPF_LSH | BPF_K: + case BPF_ALU | BPF_RSH | BPF_K: + case BPF_ALU | BPF_ARSH | BPF_K: + case BPF_ALU | BPF_ADD | BPF_K: + case BPF_ALU | BPF_SUB | BPF_K: + case BPF_ALU | BPF_MUL | BPF_K: + case BPF_ALU | BPF_DIV | BPF_K: + case BPF_ALU | BPF_MOD | BPF_K: + if (!valid_alu_i(BPF_OP(code), imm)) { + emit_mov_i(ctx, MIPS_R_T6, imm); + emit_alu_r(ctx, lo(dst), MIPS_R_T6, BPF_OP(code)); + } else if (rewrite_alu_i(BPF_OP(code), imm, &alu, &val)) { + emit_alu_i(ctx, lo(dst), val, alu); + } + emit_zext_ver(ctx, dst); + break; + /* dst = dst & src */ + /* dst = dst | src */ + /* dst = dst ^ src */ + /* dst = dst << src */ + /* dst = dst >> src */ + /* dst = dst >> src (arithmetic) */ + /* dst = dst + src */ + /* dst = dst - src */ + /* dst = dst * src */ + /* dst = dst / src */ + /* dst = dst % src */ + case BPF_ALU | BPF_AND | BPF_X: + case BPF_ALU | BPF_OR | BPF_X: + case BPF_ALU | BPF_XOR | BPF_X: + case BPF_ALU | BPF_LSH | BPF_X: + case BPF_ALU | BPF_RSH | BPF_X: + case BPF_ALU | BPF_ARSH | BPF_X: + case BPF_ALU | BPF_ADD | BPF_X: + case BPF_ALU | BPF_SUB | BPF_X: + case BPF_ALU | BPF_MUL | BPF_X: + case BPF_ALU | BPF_DIV | BPF_X: + case BPF_ALU | BPF_MOD | BPF_X: + emit_alu_r(ctx, lo(dst), lo(src), BPF_OP(code)); + emit_zext_ver(ctx, dst); + break; + /* dst = imm (64-bit) */ + case BPF_ALU64 | BPF_MOV | BPF_K: + emit_mov_se_i64(ctx, dst, imm); + break; + /* dst = src (64-bit) */ + case BPF_ALU64 | BPF_MOV | BPF_X: + emit_mov_r(ctx, lo(dst), lo(src)); + emit_mov_r(ctx, hi(dst), hi(src)); + break; + /* dst = -dst (64-bit) */ + case BPF_ALU64 | BPF_NEG: + emit_neg_i64(ctx, dst); + break; + /* dst = dst & imm (64-bit) */ + case BPF_ALU64 | BPF_AND | BPF_K: + emit_alu_i64(ctx, dst, imm, BPF_OP(code)); + break; + /* dst = dst | imm (64-bit) */ + /* dst = dst ^ imm (64-bit) */ + /* dst = dst + imm (64-bit) */ + /* dst = dst - imm (64-bit) */ + case BPF_ALU64 | BPF_OR | BPF_K: + case BPF_ALU64 | BPF_XOR | BPF_K: + case BPF_ALU64 | BPF_ADD | BPF_K: + case BPF_ALU64 | BPF_SUB | BPF_K: + if (imm) + emit_alu_i64(ctx, dst, imm, BPF_OP(code)); + break; + /* dst = dst << imm (64-bit) */ + /* dst = dst >> imm (64-bit) */ + /* dst = dst >> imm (64-bit, arithmetic) */ + case BPF_ALU64 | BPF_LSH | BPF_K: + case BPF_ALU64 | BPF_RSH | BPF_K: + case BPF_ALU64 | BPF_ARSH | BPF_K: + if (imm) + emit_shift_i64(ctx, dst, imm, BPF_OP(code)); + break; + /* dst = dst * imm (64-bit) */ + case BPF_ALU64 | BPF_MUL | BPF_K: + emit_mul_i64(ctx, dst, imm); + break; + /* dst = dst / imm (64-bit) */ + /* dst = dst % imm (64-bit) */ + case BPF_ALU64 | BPF_DIV | BPF_K: + case BPF_ALU64 | BPF_MOD | BPF_K: + /* + * Sign-extend the immediate value into a temporary register, + * and then do the operation on this register. + */ + emit_mov_se_i64(ctx, tmp, imm); + emit_divmod_r64(ctx, dst, tmp, BPF_OP(code)); + break; + /* dst = dst & src (64-bit) */ + /* dst = dst | src (64-bit) */ + /* dst = dst ^ src (64-bit) */ + /* dst = dst + src (64-bit) */ + /* dst = dst - src (64-bit) */ + case BPF_ALU64 | BPF_AND | BPF_X: + case BPF_ALU64 | BPF_OR | BPF_X: + case BPF_ALU64 | BPF_XOR | BPF_X: + case BPF_ALU64 | BPF_ADD | BPF_X: + case BPF_ALU64 | BPF_SUB | BPF_X: + emit_alu_r64(ctx, dst, src, BPF_OP(code)); + break; + /* dst = dst << src (64-bit) */ + /* dst = dst >> src (64-bit) */ + /* dst = dst >> src (64-bit, arithmetic) */ + case BPF_ALU64 | BPF_LSH | BPF_X: + case BPF_ALU64 | BPF_RSH | BPF_X: + case BPF_ALU64 | BPF_ARSH | BPF_X: + emit_shift_r64(ctx, dst, lo(src), BPF_OP(code)); + break; + /* dst = dst * src (64-bit) */ + case BPF_ALU64 | BPF_MUL | BPF_X: + emit_mul_r64(ctx, dst, src); + break; + /* dst = dst / src (64-bit) */ + /* dst = dst % src (64-bit) */ + case BPF_ALU64 | BPF_DIV | BPF_X: + case BPF_ALU64 | BPF_MOD | BPF_X: + emit_divmod_r64(ctx, dst, src, BPF_OP(code)); + break; + /* dst = htole(dst) */ + /* dst = htobe(dst) */ + case BPF_ALU | BPF_END | BPF_FROM_LE: + case BPF_ALU | BPF_END | BPF_FROM_BE: + if (BPF_SRC(code) == +#ifdef __BIG_ENDIAN + BPF_FROM_LE +#else + BPF_FROM_BE +#endif + ) + emit_bswap_r64(ctx, dst, imm); + else + emit_trunc_r64(ctx, dst, imm); + break; + /* dst = imm64 */ + case BPF_LD | BPF_IMM | BPF_DW: + emit_mov_i(ctx, lo(dst), imm); + emit_mov_i(ctx, hi(dst), insn[1].imm); + return 1; + /* LDX: dst = *(size *)(src + off) */ + case BPF_LDX | BPF_MEM | BPF_W: + case BPF_LDX | BPF_MEM | BPF_H: + case BPF_LDX | BPF_MEM | BPF_B: + case BPF_LDX | BPF_MEM | BPF_DW: + emit_ldx(ctx, dst, lo(src), off, BPF_SIZE(code)); + break; + /* ST: *(size *)(dst + off) = imm */ + case BPF_ST | BPF_MEM | BPF_W: + case BPF_ST | BPF_MEM | BPF_H: + case BPF_ST | BPF_MEM | BPF_B: + case BPF_ST | BPF_MEM | BPF_DW: + switch (BPF_SIZE(code)) { + case BPF_DW: + /* Sign-extend immediate value into temporary reg */ + emit_mov_se_i64(ctx, tmp, imm); + break; + case BPF_W: + case BPF_H: + case BPF_B: + emit_mov_i(ctx, lo(tmp), imm); + break; + } + emit_stx(ctx, lo(dst), tmp, off, BPF_SIZE(code)); + break; + /* STX: *(size *)(dst + off) = src */ + case BPF_STX | BPF_MEM | BPF_W: + case BPF_STX | BPF_MEM | BPF_H: + case BPF_STX | BPF_MEM | BPF_B: + case BPF_STX | BPF_MEM | BPF_DW: + emit_stx(ctx, lo(dst), src, off, BPF_SIZE(code)); + break; + /* Speculation barrier */ + case BPF_ST | BPF_NOSPEC: + break; + /* Atomics */ + case BPF_STX | BPF_ATOMIC | BPF_W: + switch (imm) { + case BPF_ADD: + case BPF_ADD | BPF_FETCH: + case BPF_AND: + case BPF_AND | BPF_FETCH: + case BPF_OR: + case BPF_OR | BPF_FETCH: + case BPF_XOR: + case BPF_XOR | BPF_FETCH: + case BPF_XCHG: + if (cpu_has_llsc) + emit_atomic_r(ctx, lo(dst), lo(src), off, imm); + else /* Non-ll/sc fallback */ + emit_atomic_r32(ctx, lo(dst), lo(src), + off, imm); + if (imm & BPF_FETCH) + emit_zext_ver(ctx, src); + break; + case BPF_CMPXCHG: + if (cpu_has_llsc) + emit_cmpxchg_r(ctx, lo(dst), lo(src), + lo(res), off); + else /* Non-ll/sc fallback */ + emit_cmpxchg_r32(ctx, lo(dst), lo(src), off); + /* Result zero-extension inserted by verifier */ + break; + default: + goto notyet; + } + break; + /* Atomics (64-bit) */ + case BPF_STX | BPF_ATOMIC | BPF_DW: + switch (imm) { + case BPF_ADD: + case BPF_ADD | BPF_FETCH: + case BPF_AND: + case BPF_AND | BPF_FETCH: + case BPF_OR: + case BPF_OR | BPF_FETCH: + case BPF_XOR: + case BPF_XOR | BPF_FETCH: + case BPF_XCHG: + emit_atomic_r64(ctx, lo(dst), src, off, imm); + break; + case BPF_CMPXCHG: + emit_cmpxchg_r64(ctx, lo(dst), src, off); + break; + default: + goto notyet; + } + break; + /* PC += off if dst == src */ + /* PC += off if dst != src */ + /* PC += off if dst & src */ + /* PC += off if dst > src */ + /* PC += off if dst >= src */ + /* PC += off if dst < src */ + /* PC += off if dst <= src */ + /* PC += off if dst > src (signed) */ + /* PC += off if dst >= src (signed) */ + /* PC += off if dst < src (signed) */ + /* PC += off if dst <= src (signed) */ + case BPF_JMP32 | BPF_JEQ | BPF_X: + case BPF_JMP32 | BPF_JNE | BPF_X: + case BPF_JMP32 | BPF_JSET | BPF_X: + case BPF_JMP32 | BPF_JGT | BPF_X: + case BPF_JMP32 | BPF_JGE | BPF_X: + case BPF_JMP32 | BPF_JLT | BPF_X: + case BPF_JMP32 | BPF_JLE | BPF_X: + case BPF_JMP32 | BPF_JSGT | BPF_X: + case BPF_JMP32 | BPF_JSGE | BPF_X: + case BPF_JMP32 | BPF_JSLT | BPF_X: + case BPF_JMP32 | BPF_JSLE | BPF_X: + if (off == 0) + break; + setup_jmp_r(ctx, dst == src, BPF_OP(code), off, &jmp, &rel); + emit_jmp_r(ctx, lo(dst), lo(src), rel, jmp); + if (finish_jmp(ctx, jmp, off) < 0) + goto toofar; + break; + /* PC += off if dst == imm */ + /* PC += off if dst != imm */ + /* PC += off if dst & imm */ + /* PC += off if dst > imm */ + /* PC += off if dst >= imm */ + /* PC += off if dst < imm */ + /* PC += off if dst <= imm */ + /* PC += off if dst > imm (signed) */ + /* PC += off if dst >= imm (signed) */ + /* PC += off if dst < imm (signed) */ + /* PC += off if dst <= imm (signed) */ + case BPF_JMP32 | BPF_JEQ | BPF_K: + case BPF_JMP32 | BPF_JNE | BPF_K: + case BPF_JMP32 | BPF_JSET | BPF_K: + case BPF_JMP32 | BPF_JGT | BPF_K: + case BPF_JMP32 | BPF_JGE | BPF_K: + case BPF_JMP32 | BPF_JLT | BPF_K: + case BPF_JMP32 | BPF_JLE | BPF_K: + case BPF_JMP32 | BPF_JSGT | BPF_K: + case BPF_JMP32 | BPF_JSGE | BPF_K: + case BPF_JMP32 | BPF_JSLT | BPF_K: + case BPF_JMP32 | BPF_JSLE | BPF_K: + if (off == 0) + break; + setup_jmp_i(ctx, imm, 32, BPF_OP(code), off, &jmp, &rel); + if (valid_jmp_i(jmp, imm)) { + emit_jmp_i(ctx, lo(dst), imm, rel, jmp); + } else { + /* Move large immediate to register */ + emit_mov_i(ctx, MIPS_R_T6, imm); + emit_jmp_r(ctx, lo(dst), MIPS_R_T6, rel, jmp); + } + if (finish_jmp(ctx, jmp, off) < 0) + goto toofar; + break; + /* PC += off if dst == src */ + /* PC += off if dst != src */ + /* PC += off if dst & src */ + /* PC += off if dst > src */ + /* PC += off if dst >= src */ + /* PC += off if dst < src */ + /* PC += off if dst <= src */ + /* PC += off if dst > src (signed) */ + /* PC += off if dst >= src (signed) */ + /* PC += off if dst < src (signed) */ + /* PC += off if dst <= src (signed) */ + case BPF_JMP | BPF_JEQ | BPF_X: + case BPF_JMP | BPF_JNE | BPF_X: + case BPF_JMP | BPF_JSET | BPF_X: + case BPF_JMP | BPF_JGT | BPF_X: + case BPF_JMP | BPF_JGE | BPF_X: + case BPF_JMP | BPF_JLT | BPF_X: + case BPF_JMP | BPF_JLE | BPF_X: + case BPF_JMP | BPF_JSGT | BPF_X: + case BPF_JMP | BPF_JSGE | BPF_X: + case BPF_JMP | BPF_JSLT | BPF_X: + case BPF_JMP | BPF_JSLE | BPF_X: + if (off == 0) + break; + setup_jmp_r(ctx, dst == src, BPF_OP(code), off, &jmp, &rel); + emit_jmp_r64(ctx, dst, src, rel, jmp); + if (finish_jmp(ctx, jmp, off) < 0) + goto toofar; + break; + /* PC += off if dst == imm */ + /* PC += off if dst != imm */ + /* PC += off if dst & imm */ + /* PC += off if dst > imm */ + /* PC += off if dst >= imm */ + /* PC += off if dst < imm */ + /* PC += off if dst <= imm */ + /* PC += off if dst > imm (signed) */ + /* PC += off if dst >= imm (signed) */ + /* PC += off if dst < imm (signed) */ + /* PC += off if dst <= imm (signed) */ + case BPF_JMP | BPF_JEQ | BPF_K: + case BPF_JMP | BPF_JNE | BPF_K: + case BPF_JMP | BPF_JSET | BPF_K: + case BPF_JMP | BPF_JGT | BPF_K: + case BPF_JMP | BPF_JGE | BPF_K: + case BPF_JMP | BPF_JLT | BPF_K: + case BPF_JMP | BPF_JLE | BPF_K: + case BPF_JMP | BPF_JSGT | BPF_K: + case BPF_JMP | BPF_JSGE | BPF_K: + case BPF_JMP | BPF_JSLT | BPF_K: + case BPF_JMP | BPF_JSLE | BPF_K: + if (off == 0) + break; + setup_jmp_i(ctx, imm, 64, BPF_OP(code), off, &jmp, &rel); + emit_jmp_i64(ctx, dst, imm, rel, jmp); + if (finish_jmp(ctx, jmp, off) < 0) + goto toofar; + break; + /* PC += off */ + case BPF_JMP | BPF_JA: + if (off == 0) + break; + if (emit_ja(ctx, off) < 0) + goto toofar; + break; + /* Tail call */ + case BPF_JMP | BPF_TAIL_CALL: + if (emit_tail_call(ctx) < 0) + goto invalid; + break; + /* Function call */ + case BPF_JMP | BPF_CALL: + if (emit_call(ctx, insn) < 0) + goto invalid; + break; + /* Function return */ + case BPF_JMP | BPF_EXIT: + /* + * Optimization: when last instruction is EXIT + * simply continue to epilogue. + */ + if (ctx->bpf_index == ctx->program->len - 1) + break; + if (emit_exit(ctx) < 0) + goto toofar; + break; + + default: +invalid: + pr_err_once("unknown opcode %02x\n", code); + return -EINVAL; +notyet: + pr_info_once("*** NOT YET: opcode %02x ***\n", code); + return -EFAULT; +toofar: + pr_info_once("*** TOO FAR: jump at %u opcode %02x ***\n", + ctx->bpf_index, code); + return -E2BIG; + } + return 0; +} From fbc802de6b10669bfe2d4ebc4dcf12563bba117c Mon Sep 17 00:00:00 2001 From: Johan Almbladh Date: Tue, 5 Oct 2021 18:54:05 +0200 Subject: [PATCH 018/181] mips, bpf: Add new eBPF JIT for 64-bit MIPS This is an implementation on of an eBPF JIT for 64-bit MIPS III-V and MIPS64r1-r6. It uses the same framework introduced by the 32-bit JIT. Signed-off-by: Johan Almbladh Signed-off-by: Daniel Borkmann Signed-off-by: Andrii Nakryiko Link: https://lore.kernel.org/bpf/20211005165408.2305108-5-johan.almbladh@anyfinetworks.com --- arch/mips/net/bpf_jit_comp64.c | 1052 ++++++++++++++++++++++++++++++++ 1 file changed, 1052 insertions(+) create mode 100644 arch/mips/net/bpf_jit_comp64.c diff --git a/arch/mips/net/bpf_jit_comp64.c b/arch/mips/net/bpf_jit_comp64.c new file mode 100644 index 0000000000000..ca49d3ef7ff49 --- /dev/null +++ b/arch/mips/net/bpf_jit_comp64.c @@ -0,0 +1,1052 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * Just-In-Time compiler for eBPF bytecode on MIPS. + * Implementation of JIT functions for 64-bit CPUs. + * + * Copyright (c) 2021 Anyfi Networks AB. + * Author: Johan Almbladh + * + * Based on code and ideas from + * Copyright (c) 2017 Cavium, Inc. + * Copyright (c) 2017 Shubham Bansal + * Copyright (c) 2011 Mircea Gherzan + */ + +#include +#include +#include +#include +#include +#include + +#include "bpf_jit_comp.h" + +/* MIPS t0-t3 are not available in the n64 ABI */ +#undef MIPS_R_T0 +#undef MIPS_R_T1 +#undef MIPS_R_T2 +#undef MIPS_R_T3 + +/* Stack is 16-byte aligned in n64 ABI */ +#define MIPS_STACK_ALIGNMENT 16 + +/* Extra 64-bit eBPF registers used by JIT */ +#define JIT_REG_TC (MAX_BPF_JIT_REG + 0) +#define JIT_REG_ZX (MAX_BPF_JIT_REG + 1) + +/* Number of prologue bytes to skip when doing a tail call */ +#define JIT_TCALL_SKIP 4 + +/* Callee-saved CPU registers that the JIT must preserve */ +#define JIT_CALLEE_REGS \ + (BIT(MIPS_R_S0) | \ + BIT(MIPS_R_S1) | \ + BIT(MIPS_R_S2) | \ + BIT(MIPS_R_S3) | \ + BIT(MIPS_R_S4) | \ + BIT(MIPS_R_S5) | \ + BIT(MIPS_R_S6) | \ + BIT(MIPS_R_S7) | \ + BIT(MIPS_R_GP) | \ + BIT(MIPS_R_FP) | \ + BIT(MIPS_R_RA)) + +/* Caller-saved CPU registers available for JIT use */ +#define JIT_CALLER_REGS \ + (BIT(MIPS_R_A5) | \ + BIT(MIPS_R_A6) | \ + BIT(MIPS_R_A7)) +/* + * Mapping of 64-bit eBPF registers to 64-bit native MIPS registers. + * MIPS registers t4 - t7 may be used by the JIT as temporary registers. + * MIPS registers t8 - t9 are reserved for single-register common functions. + */ +static const u8 bpf2mips64[] = { + /* Return value from in-kernel function, and exit value from eBPF */ + [BPF_REG_0] = MIPS_R_V0, + /* Arguments from eBPF program to in-kernel function */ + [BPF_REG_1] = MIPS_R_A0, + [BPF_REG_2] = MIPS_R_A1, + [BPF_REG_3] = MIPS_R_A2, + [BPF_REG_4] = MIPS_R_A3, + [BPF_REG_5] = MIPS_R_A4, + /* Callee-saved registers that in-kernel function will preserve */ + [BPF_REG_6] = MIPS_R_S0, + [BPF_REG_7] = MIPS_R_S1, + [BPF_REG_8] = MIPS_R_S2, + [BPF_REG_9] = MIPS_R_S3, + /* Read-only frame pointer to access the eBPF stack */ + [BPF_REG_FP] = MIPS_R_FP, + /* Temporary register for blinding constants */ + [BPF_REG_AX] = MIPS_R_AT, + /* Tail call count register, caller-saved */ + [JIT_REG_TC] = MIPS_R_A5, + /* Constant for register zero-extension */ + [JIT_REG_ZX] = MIPS_R_V1, +}; + +/* + * MIPS 32-bit operations on 64-bit registers generate a sign-extended + * result. However, the eBPF ISA mandates zero-extension, so we rely on the + * verifier to add that for us (emit_zext_ver). In addition, ALU arithmetic + * operations, right shift and byte swap require properly sign-extended + * operands or the result is unpredictable. We emit explicit sign-extensions + * in those cases. + */ + +/* Sign extension */ +static void emit_sext(struct jit_context *ctx, u8 dst, u8 src) +{ + emit(ctx, sll, dst, src, 0); + clobber_reg(ctx, dst); +} + +/* Zero extension */ +static void emit_zext(struct jit_context *ctx, u8 dst) +{ + if (cpu_has_mips64r2 || cpu_has_mips64r6) { + emit(ctx, dinsu, dst, MIPS_R_ZERO, 32, 32); + } else { + emit(ctx, and, dst, dst, bpf2mips64[JIT_REG_ZX]); + access_reg(ctx, JIT_REG_ZX); /* We need the ZX register */ + } + clobber_reg(ctx, dst); +} + +/* Zero extension, if verifier does not do it for us */ +static void emit_zext_ver(struct jit_context *ctx, u8 dst) +{ + if (!ctx->program->aux->verifier_zext) + emit_zext(ctx, dst); +} + +/* dst = imm (64-bit) */ +static void emit_mov_i64(struct jit_context *ctx, u8 dst, u64 imm64) +{ + if (imm64 >= 0xffffffffffff8000ULL || imm64 < 0x8000ULL) { + emit(ctx, daddiu, dst, MIPS_R_ZERO, (s16)imm64); + } else if (imm64 >= 0xffffffff80000000ULL || + (imm64 < 0x80000000 && imm64 > 0xffff)) { + emit(ctx, lui, dst, (s16)(imm64 >> 16)); + emit(ctx, ori, dst, dst, (u16)imm64 & 0xffff); + } else { + u8 acc = MIPS_R_ZERO; + int k; + + for (k = 0; k < 4; k++) { + u16 half = imm64 >> (48 - 16 * k); + + if (acc == dst) + emit(ctx, dsll, dst, dst, 16); + + if (half) { + emit(ctx, ori, dst, acc, half); + acc = dst; + } + } + } + clobber_reg(ctx, dst); +} + +/* ALU immediate operation (64-bit) */ +static void emit_alu_i64(struct jit_context *ctx, u8 dst, s32 imm, u8 op) +{ + switch (BPF_OP(op)) { + /* dst = dst | imm */ + case BPF_OR: + emit(ctx, ori, dst, dst, (u16)imm); + break; + /* dst = dst ^ imm */ + case BPF_XOR: + emit(ctx, xori, dst, dst, (u16)imm); + break; + /* dst = -dst */ + case BPF_NEG: + emit(ctx, dsubu, dst, MIPS_R_ZERO, dst); + break; + /* dst = dst << imm */ + case BPF_LSH: + emit(ctx, dsll_safe, dst, dst, imm); + break; + /* dst = dst >> imm */ + case BPF_RSH: + emit(ctx, dsrl_safe, dst, dst, imm); + break; + /* dst = dst >> imm (arithmetic) */ + case BPF_ARSH: + emit(ctx, dsra_safe, dst, dst, imm); + break; + /* dst = dst + imm */ + case BPF_ADD: + emit(ctx, daddiu, dst, dst, imm); + break; + /* dst = dst - imm */ + case BPF_SUB: + emit(ctx, daddiu, dst, dst, -imm); + break; + default: + /* Width-generic operations */ + emit_alu_i(ctx, dst, imm, op); + } + clobber_reg(ctx, dst); +} + +/* ALU register operation (64-bit) */ +static void emit_alu_r64(struct jit_context *ctx, u8 dst, u8 src, u8 op) +{ + switch (BPF_OP(op)) { + /* dst = dst << src */ + case BPF_LSH: + emit(ctx, dsllv, dst, dst, src); + break; + /* dst = dst >> src */ + case BPF_RSH: + emit(ctx, dsrlv, dst, dst, src); + break; + /* dst = dst >> src (arithmetic) */ + case BPF_ARSH: + emit(ctx, dsrav, dst, dst, src); + break; + /* dst = dst + src */ + case BPF_ADD: + emit(ctx, daddu, dst, dst, src); + break; + /* dst = dst - src */ + case BPF_SUB: + emit(ctx, dsubu, dst, dst, src); + break; + /* dst = dst * src */ + case BPF_MUL: + if (cpu_has_mips64r6) { + emit(ctx, dmulu, dst, dst, src); + } else { + emit(ctx, dmultu, dst, src); + emit(ctx, mflo, dst); + } + break; + /* dst = dst / src */ + case BPF_DIV: + if (cpu_has_mips64r6) { + emit(ctx, ddivu_r6, dst, dst, src); + } else { + emit(ctx, ddivu, dst, src); + emit(ctx, mflo, dst); + } + break; + /* dst = dst % src */ + case BPF_MOD: + if (cpu_has_mips64r6) { + emit(ctx, dmodu, dst, dst, src); + } else { + emit(ctx, ddivu, dst, src); + emit(ctx, mfhi, dst); + } + break; + default: + /* Width-generic operations */ + emit_alu_r(ctx, dst, src, op); + } + clobber_reg(ctx, dst); +} + +/* Swap sub words in a register double word */ +static void emit_swap_r64(struct jit_context *ctx, u8 dst, u8 mask, u32 bits) +{ + u8 tmp = MIPS_R_T9; + + emit(ctx, and, tmp, dst, mask); /* tmp = dst & mask */ + emit(ctx, dsll, tmp, tmp, bits); /* tmp = tmp << bits */ + emit(ctx, dsrl, dst, dst, bits); /* dst = dst >> bits */ + emit(ctx, and, dst, dst, mask); /* dst = dst & mask */ + emit(ctx, or, dst, dst, tmp); /* dst = dst | tmp */ +} + +/* Swap bytes and truncate a register double word, word or half word */ +static void emit_bswap_r64(struct jit_context *ctx, u8 dst, u32 width) +{ + switch (width) { + /* Swap bytes in a double word */ + case 64: + if (cpu_has_mips64r2 || cpu_has_mips64r6) { + emit(ctx, dsbh, dst, dst); + emit(ctx, dshd, dst, dst); + } else { + u8 t1 = MIPS_R_T6; + u8 t2 = MIPS_R_T7; + + emit(ctx, dsll32, t2, dst, 0); /* t2 = dst << 32 */ + emit(ctx, dsrl32, dst, dst, 0); /* dst = dst >> 32 */ + emit(ctx, or, dst, dst, t2); /* dst = dst | t2 */ + + emit(ctx, ori, t2, MIPS_R_ZERO, 0xffff); + emit(ctx, dsll32, t1, t2, 0); /* t1 = t2 << 32 */ + emit(ctx, or, t1, t1, t2); /* t1 = t1 | t2 */ + emit_swap_r64(ctx, dst, t1, 16);/* dst = swap16(dst) */ + + emit(ctx, lui, t2, 0xff); /* t2 = 0x00ff0000 */ + emit(ctx, ori, t2, t2, 0xff); /* t2 = t2 | 0x00ff */ + emit(ctx, dsll32, t1, t2, 0); /* t1 = t2 << 32 */ + emit(ctx, or, t1, t1, t2); /* t1 = t1 | t2 */ + emit_swap_r64(ctx, dst, t1, 8); /* dst = swap8(dst) */ + } + break; + /* Swap bytes in a half word */ + /* Swap bytes in a word */ + case 32: + case 16: + emit_sext(ctx, dst, dst); + emit_bswap_r(ctx, dst, width); + if (cpu_has_mips64r2 || cpu_has_mips64r6) + emit_zext(ctx, dst); + break; + } + clobber_reg(ctx, dst); +} + +/* Truncate a register double word, word or half word */ +static void emit_trunc_r64(struct jit_context *ctx, u8 dst, u32 width) +{ + switch (width) { + case 64: + break; + /* Zero-extend a word */ + case 32: + emit_zext(ctx, dst); + break; + /* Zero-extend a half word */ + case 16: + emit(ctx, andi, dst, dst, 0xffff); + break; + } + clobber_reg(ctx, dst); +} + +/* Load operation: dst = *(size*)(src + off) */ +static void emit_ldx(struct jit_context *ctx, u8 dst, u8 src, s16 off, u8 size) +{ + switch (size) { + /* Load a byte */ + case BPF_B: + emit(ctx, lbu, dst, off, src); + break; + /* Load a half word */ + case BPF_H: + emit(ctx, lhu, dst, off, src); + break; + /* Load a word */ + case BPF_W: + emit(ctx, lwu, dst, off, src); + break; + /* Load a double word */ + case BPF_DW: + emit(ctx, ld, dst, off, src); + break; + } + clobber_reg(ctx, dst); +} + +/* Store operation: *(size *)(dst + off) = src */ +static void emit_stx(struct jit_context *ctx, u8 dst, u8 src, s16 off, u8 size) +{ + switch (size) { + /* Store a byte */ + case BPF_B: + emit(ctx, sb, src, off, dst); + break; + /* Store a half word */ + case BPF_H: + emit(ctx, sh, src, off, dst); + break; + /* Store a word */ + case BPF_W: + emit(ctx, sw, src, off, dst); + break; + /* Store a double word */ + case BPF_DW: + emit(ctx, sd, src, off, dst); + break; + } +} + +/* Atomic read-modify-write */ +static void emit_atomic_r64(struct jit_context *ctx, + u8 dst, u8 src, s16 off, u8 code) +{ + u8 t1 = MIPS_R_T6; + u8 t2 = MIPS_R_T7; + + emit(ctx, lld, t1, off, dst); + switch (code) { + case BPF_ADD: + case BPF_ADD | BPF_FETCH: + emit(ctx, daddu, t2, t1, src); + break; + case BPF_AND: + case BPF_AND | BPF_FETCH: + emit(ctx, and, t2, t1, src); + break; + case BPF_OR: + case BPF_OR | BPF_FETCH: + emit(ctx, or, t2, t1, src); + break; + case BPF_XOR: + case BPF_XOR | BPF_FETCH: + emit(ctx, xor, t2, t1, src); + break; + case BPF_XCHG: + emit(ctx, move, t2, src); + break; + } + emit(ctx, scd, t2, off, dst); + emit(ctx, beqz, t2, -16); + emit(ctx, nop); /* Delay slot */ + + if (code & BPF_FETCH) { + emit(ctx, move, src, t1); + clobber_reg(ctx, src); + } +} + +/* Atomic compare-and-exchange */ +static void emit_cmpxchg_r64(struct jit_context *ctx, u8 dst, u8 src, s16 off) +{ + u8 r0 = bpf2mips64[BPF_REG_0]; + u8 t1 = MIPS_R_T6; + u8 t2 = MIPS_R_T7; + + emit(ctx, lld, t1, off, dst); + emit(ctx, bne, t1, r0, 12); + emit(ctx, move, t2, src); /* Delay slot */ + emit(ctx, scd, t2, off, dst); + emit(ctx, beqz, t2, -20); + emit(ctx, move, r0, t1); /* Delay slot */ + + clobber_reg(ctx, r0); +} + +/* Function call */ +static int emit_call(struct jit_context *ctx, const struct bpf_insn *insn) +{ + u8 zx = bpf2mips64[JIT_REG_ZX]; + u8 tmp = MIPS_R_T6; + bool fixed; + u64 addr; + + /* Decode the call address */ + if (bpf_jit_get_func_addr(ctx->program, insn, false, + &addr, &fixed) < 0) + return -1; + if (!fixed) + return -1; + + /* Push caller-saved registers on stack */ + push_regs(ctx, ctx->clobbered & JIT_CALLER_REGS, 0, 0); + + /* Emit function call */ + emit_mov_i64(ctx, tmp, addr); + emit(ctx, jalr, MIPS_R_RA, tmp); + emit(ctx, nop); /* Delay slot */ + + /* Restore caller-saved registers */ + pop_regs(ctx, ctx->clobbered & JIT_CALLER_REGS, 0, 0); + + /* Re-initialize the JIT zero-extension register if accessed */ + if (ctx->accessed & BIT(JIT_REG_ZX)) { + emit(ctx, daddiu, zx, MIPS_R_ZERO, -1); + emit(ctx, dsrl32, zx, zx, 0); + } + + clobber_reg(ctx, MIPS_R_RA); + clobber_reg(ctx, MIPS_R_V0); + clobber_reg(ctx, MIPS_R_V1); + return 0; +} + +/* Function tail call */ +static int emit_tail_call(struct jit_context *ctx) +{ + u8 ary = bpf2mips64[BPF_REG_2]; + u8 ind = bpf2mips64[BPF_REG_3]; + u8 tcc = bpf2mips64[JIT_REG_TC]; + u8 tmp = MIPS_R_T6; + int off; + + /* + * Tail call: + * eBPF R1 - function argument (context ptr), passed in a0-a1 + * eBPF R2 - ptr to object with array of function entry points + * eBPF R3 - array index of function to be called + */ + + /* if (ind >= ary->map.max_entries) goto out */ + off = offsetof(struct bpf_array, map.max_entries); + if (off > 0x7fff) + return -1; + emit(ctx, lwu, tmp, off, ary); /* tmp = ary->map.max_entrs*/ + emit(ctx, sltu, tmp, ind, tmp); /* tmp = ind < t1 */ + emit(ctx, beqz, tmp, get_offset(ctx, 1)); /* PC += off(1) if tmp == 0*/ + + /* if (--TCC < 0) goto out */ + emit(ctx, daddiu, tcc, tcc, -1); /* tcc-- (delay slot) */ + emit(ctx, bltz, tcc, get_offset(ctx, 1)); /* PC += off(1) if tcc < 0 */ + /* (next insn delay slot) */ + /* prog = ary->ptrs[ind] */ + off = offsetof(struct bpf_array, ptrs); + if (off > 0x7fff) + return -1; + emit(ctx, dsll, tmp, ind, 3); /* tmp = ind << 3 */ + emit(ctx, daddu, tmp, tmp, ary); /* tmp += ary */ + emit(ctx, ld, tmp, off, tmp); /* tmp = *(tmp + off) */ + + /* if (prog == 0) goto out */ + emit(ctx, beqz, tmp, get_offset(ctx, 1)); /* PC += off(1) if tmp == 0*/ + emit(ctx, nop); /* Delay slot */ + + /* func = prog->bpf_func + 8 (prologue skip offset) */ + off = offsetof(struct bpf_prog, bpf_func); + if (off > 0x7fff) + return -1; + emit(ctx, ld, tmp, off, tmp); /* tmp = *(tmp + off) */ + emit(ctx, daddiu, tmp, tmp, JIT_TCALL_SKIP); /* tmp += skip (4) */ + + /* goto func */ + build_epilogue(ctx, tmp); + access_reg(ctx, JIT_REG_TC); + return 0; +} + +/* + * Stack frame layout for a JITed program (stack grows down). + * + * Higher address : Previous stack frame : + * +===========================+ <--- MIPS sp before call + * | Callee-saved registers, | + * | including RA and FP | + * +---------------------------+ <--- eBPF FP (MIPS fp) + * | Local eBPF variables | + * | allocated by program | + * +---------------------------+ + * | Reserved for caller-saved | + * | registers | + * Lower address +===========================+ <--- MIPS sp + */ + +/* Build program prologue to set up the stack and registers */ +void build_prologue(struct jit_context *ctx) +{ + u8 fp = bpf2mips64[BPF_REG_FP]; + u8 tc = bpf2mips64[JIT_REG_TC]; + u8 zx = bpf2mips64[JIT_REG_ZX]; + int stack, saved, locals, reserved; + + /* + * The first instruction initializes the tail call count register. + * On a tail call, the calling function jumps into the prologue + * after this instruction. + */ + emit(ctx, addiu, tc, MIPS_R_ZERO, min(MAX_TAIL_CALL_CNT + 1, 0xffff)); + + /* === Entry-point for tail calls === */ + + /* + * If the eBPF frame pointer and tail call count registers were + * accessed they must be preserved. Mark them as clobbered here + * to save and restore them on the stack as needed. + */ + if (ctx->accessed & BIT(BPF_REG_FP)) + clobber_reg(ctx, fp); + if (ctx->accessed & BIT(JIT_REG_TC)) + clobber_reg(ctx, tc); + if (ctx->accessed & BIT(JIT_REG_ZX)) + clobber_reg(ctx, zx); + + /* Compute the stack space needed for callee-saved registers */ + saved = hweight32(ctx->clobbered & JIT_CALLEE_REGS) * sizeof(u64); + saved = ALIGN(saved, MIPS_STACK_ALIGNMENT); + + /* Stack space used by eBPF program local data */ + locals = ALIGN(ctx->program->aux->stack_depth, MIPS_STACK_ALIGNMENT); + + /* + * If we are emitting function calls, reserve extra stack space for + * caller-saved registers needed by the JIT. The required space is + * computed automatically during resource usage discovery (pass 1). + */ + reserved = ctx->stack_used; + + /* Allocate the stack frame */ + stack = ALIGN(saved + locals + reserved, MIPS_STACK_ALIGNMENT); + if (stack) + emit(ctx, daddiu, MIPS_R_SP, MIPS_R_SP, -stack); + + /* Store callee-saved registers on stack */ + push_regs(ctx, ctx->clobbered & JIT_CALLEE_REGS, 0, stack - saved); + + /* Initialize the eBPF frame pointer if accessed */ + if (ctx->accessed & BIT(BPF_REG_FP)) + emit(ctx, daddiu, fp, MIPS_R_SP, stack - saved); + + /* Initialize the ePF JIT zero-extension register if accessed */ + if (ctx->accessed & BIT(JIT_REG_ZX)) { + emit(ctx, daddiu, zx, MIPS_R_ZERO, -1); + emit(ctx, dsrl32, zx, zx, 0); + } + + ctx->saved_size = saved; + ctx->stack_size = stack; +} + +/* Build the program epilogue to restore the stack and registers */ +void build_epilogue(struct jit_context *ctx, int dest_reg) +{ + /* Restore callee-saved registers from stack */ + pop_regs(ctx, ctx->clobbered & JIT_CALLEE_REGS, 0, + ctx->stack_size - ctx->saved_size); + + /* Release the stack frame */ + if (ctx->stack_size) + emit(ctx, daddiu, MIPS_R_SP, MIPS_R_SP, ctx->stack_size); + + /* Jump to return address and sign-extend the 32-bit return value */ + emit(ctx, jr, dest_reg); + emit(ctx, sll, MIPS_R_V0, MIPS_R_V0, 0); /* Delay slot */ +} + +/* Build one eBPF instruction */ +int build_insn(const struct bpf_insn *insn, struct jit_context *ctx) +{ + u8 dst = bpf2mips64[insn->dst_reg]; + u8 src = bpf2mips64[insn->src_reg]; + u8 res = bpf2mips64[BPF_REG_0]; + u8 code = insn->code; + s16 off = insn->off; + s32 imm = insn->imm; + s32 val, rel; + u8 alu, jmp; + + switch (code) { + /* ALU operations */ + /* dst = imm */ + case BPF_ALU | BPF_MOV | BPF_K: + emit_mov_i(ctx, dst, imm); + emit_zext_ver(ctx, dst); + break; + /* dst = src */ + case BPF_ALU | BPF_MOV | BPF_X: + if (imm == 1) { + /* Special mov32 for zext */ + emit_zext(ctx, dst); + } else { + emit_mov_r(ctx, dst, src); + emit_zext_ver(ctx, dst); + } + break; + /* dst = -dst */ + case BPF_ALU | BPF_NEG: + emit_sext(ctx, dst, dst); + emit_alu_i(ctx, dst, 0, BPF_NEG); + emit_zext_ver(ctx, dst); + break; + /* dst = dst & imm */ + /* dst = dst | imm */ + /* dst = dst ^ imm */ + /* dst = dst << imm */ + case BPF_ALU | BPF_OR | BPF_K: + case BPF_ALU | BPF_AND | BPF_K: + case BPF_ALU | BPF_XOR | BPF_K: + case BPF_ALU | BPF_LSH | BPF_K: + if (!valid_alu_i(BPF_OP(code), imm)) { + emit_mov_i(ctx, MIPS_R_T4, imm); + emit_alu_r(ctx, dst, MIPS_R_T4, BPF_OP(code)); + } else if (rewrite_alu_i(BPF_OP(code), imm, &alu, &val)) { + emit_alu_i(ctx, dst, val, alu); + } + emit_zext_ver(ctx, dst); + break; + /* dst = dst >> imm */ + /* dst = dst >> imm (arithmetic) */ + /* dst = dst + imm */ + /* dst = dst - imm */ + /* dst = dst * imm */ + /* dst = dst / imm */ + /* dst = dst % imm */ + case BPF_ALU | BPF_RSH | BPF_K: + case BPF_ALU | BPF_ARSH | BPF_K: + case BPF_ALU | BPF_ADD | BPF_K: + case BPF_ALU | BPF_SUB | BPF_K: + case BPF_ALU | BPF_MUL | BPF_K: + case BPF_ALU | BPF_DIV | BPF_K: + case BPF_ALU | BPF_MOD | BPF_K: + if (!valid_alu_i(BPF_OP(code), imm)) { + emit_sext(ctx, dst, dst); + emit_mov_i(ctx, MIPS_R_T4, imm); + emit_alu_r(ctx, dst, MIPS_R_T4, BPF_OP(code)); + } else if (rewrite_alu_i(BPF_OP(code), imm, &alu, &val)) { + emit_sext(ctx, dst, dst); + emit_alu_i(ctx, dst, val, alu); + } + emit_zext_ver(ctx, dst); + break; + /* dst = dst & src */ + /* dst = dst | src */ + /* dst = dst ^ src */ + /* dst = dst << src */ + case BPF_ALU | BPF_AND | BPF_X: + case BPF_ALU | BPF_OR | BPF_X: + case BPF_ALU | BPF_XOR | BPF_X: + case BPF_ALU | BPF_LSH | BPF_X: + emit_alu_r(ctx, dst, src, BPF_OP(code)); + emit_zext_ver(ctx, dst); + break; + /* dst = dst >> src */ + /* dst = dst >> src (arithmetic) */ + /* dst = dst + src */ + /* dst = dst - src */ + /* dst = dst * src */ + /* dst = dst / src */ + /* dst = dst % src */ + case BPF_ALU | BPF_RSH | BPF_X: + case BPF_ALU | BPF_ARSH | BPF_X: + case BPF_ALU | BPF_ADD | BPF_X: + case BPF_ALU | BPF_SUB | BPF_X: + case BPF_ALU | BPF_MUL | BPF_X: + case BPF_ALU | BPF_DIV | BPF_X: + case BPF_ALU | BPF_MOD | BPF_X: + emit_sext(ctx, dst, dst); + emit_sext(ctx, MIPS_R_T4, src); + emit_alu_r(ctx, dst, MIPS_R_T4, BPF_OP(code)); + emit_zext_ver(ctx, dst); + break; + /* dst = imm (64-bit) */ + case BPF_ALU64 | BPF_MOV | BPF_K: + emit_mov_i(ctx, dst, imm); + break; + /* dst = src (64-bit) */ + case BPF_ALU64 | BPF_MOV | BPF_X: + emit_mov_r(ctx, dst, src); + break; + /* dst = -dst (64-bit) */ + case BPF_ALU64 | BPF_NEG: + emit_alu_i64(ctx, dst, 0, BPF_NEG); + break; + /* dst = dst & imm (64-bit) */ + /* dst = dst | imm (64-bit) */ + /* dst = dst ^ imm (64-bit) */ + /* dst = dst << imm (64-bit) */ + /* dst = dst >> imm (64-bit) */ + /* dst = dst >> imm ((64-bit, arithmetic) */ + /* dst = dst + imm (64-bit) */ + /* dst = dst - imm (64-bit) */ + /* dst = dst * imm (64-bit) */ + /* dst = dst / imm (64-bit) */ + /* dst = dst % imm (64-bit) */ + case BPF_ALU64 | BPF_AND | BPF_K: + case BPF_ALU64 | BPF_OR | BPF_K: + case BPF_ALU64 | BPF_XOR | BPF_K: + case BPF_ALU64 | BPF_LSH | BPF_K: + case BPF_ALU64 | BPF_RSH | BPF_K: + case BPF_ALU64 | BPF_ARSH | BPF_K: + case BPF_ALU64 | BPF_ADD | BPF_K: + case BPF_ALU64 | BPF_SUB | BPF_K: + case BPF_ALU64 | BPF_MUL | BPF_K: + case BPF_ALU64 | BPF_DIV | BPF_K: + case BPF_ALU64 | BPF_MOD | BPF_K: + if (!valid_alu_i(BPF_OP(code), imm)) { + emit_mov_i(ctx, MIPS_R_T4, imm); + emit_alu_r64(ctx, dst, MIPS_R_T4, BPF_OP(code)); + } else if (rewrite_alu_i(BPF_OP(code), imm, &alu, &val)) { + emit_alu_i64(ctx, dst, val, alu); + } + break; + /* dst = dst & src (64-bit) */ + /* dst = dst | src (64-bit) */ + /* dst = dst ^ src (64-bit) */ + /* dst = dst << src (64-bit) */ + /* dst = dst >> src (64-bit) */ + /* dst = dst >> src (64-bit, arithmetic) */ + /* dst = dst + src (64-bit) */ + /* dst = dst - src (64-bit) */ + /* dst = dst * src (64-bit) */ + /* dst = dst / src (64-bit) */ + /* dst = dst % src (64-bit) */ + case BPF_ALU64 | BPF_AND | BPF_X: + case BPF_ALU64 | BPF_OR | BPF_X: + case BPF_ALU64 | BPF_XOR | BPF_X: + case BPF_ALU64 | BPF_LSH | BPF_X: + case BPF_ALU64 | BPF_RSH | BPF_X: + case BPF_ALU64 | BPF_ARSH | BPF_X: + case BPF_ALU64 | BPF_ADD | BPF_X: + case BPF_ALU64 | BPF_SUB | BPF_X: + case BPF_ALU64 | BPF_MUL | BPF_X: + case BPF_ALU64 | BPF_DIV | BPF_X: + case BPF_ALU64 | BPF_MOD | BPF_X: + emit_alu_r64(ctx, dst, src, BPF_OP(code)); + break; + /* dst = htole(dst) */ + /* dst = htobe(dst) */ + case BPF_ALU | BPF_END | BPF_FROM_LE: + case BPF_ALU | BPF_END | BPF_FROM_BE: + if (BPF_SRC(code) == +#ifdef __BIG_ENDIAN + BPF_FROM_LE +#else + BPF_FROM_BE +#endif + ) + emit_bswap_r64(ctx, dst, imm); + else + emit_trunc_r64(ctx, dst, imm); + break; + /* dst = imm64 */ + case BPF_LD | BPF_IMM | BPF_DW: + emit_mov_i64(ctx, dst, (u32)imm | ((u64)insn[1].imm << 32)); + return 1; + /* LDX: dst = *(size *)(src + off) */ + case BPF_LDX | BPF_MEM | BPF_W: + case BPF_LDX | BPF_MEM | BPF_H: + case BPF_LDX | BPF_MEM | BPF_B: + case BPF_LDX | BPF_MEM | BPF_DW: + emit_ldx(ctx, dst, src, off, BPF_SIZE(code)); + break; + /* ST: *(size *)(dst + off) = imm */ + case BPF_ST | BPF_MEM | BPF_W: + case BPF_ST | BPF_MEM | BPF_H: + case BPF_ST | BPF_MEM | BPF_B: + case BPF_ST | BPF_MEM | BPF_DW: + emit_mov_i(ctx, MIPS_R_T4, imm); + emit_stx(ctx, dst, MIPS_R_T4, off, BPF_SIZE(code)); + break; + /* STX: *(size *)(dst + off) = src */ + case BPF_STX | BPF_MEM | BPF_W: + case BPF_STX | BPF_MEM | BPF_H: + case BPF_STX | BPF_MEM | BPF_B: + case BPF_STX | BPF_MEM | BPF_DW: + emit_stx(ctx, dst, src, off, BPF_SIZE(code)); + break; + /* Speculation barrier */ + case BPF_ST | BPF_NOSPEC: + break; + /* Atomics */ + case BPF_STX | BPF_ATOMIC | BPF_W: + case BPF_STX | BPF_ATOMIC | BPF_DW: + switch (imm) { + case BPF_ADD: + case BPF_ADD | BPF_FETCH: + case BPF_AND: + case BPF_AND | BPF_FETCH: + case BPF_OR: + case BPF_OR | BPF_FETCH: + case BPF_XOR: + case BPF_XOR | BPF_FETCH: + case BPF_XCHG: + if (BPF_SIZE(code) == BPF_DW) { + emit_atomic_r64(ctx, dst, src, off, imm); + } else if (imm & BPF_FETCH) { + u8 tmp = dst; + + if (src == dst) { /* Don't overwrite dst */ + emit_mov_r(ctx, MIPS_R_T4, dst); + tmp = MIPS_R_T4; + } + emit_sext(ctx, src, src); + emit_atomic_r(ctx, tmp, src, off, imm); + emit_zext_ver(ctx, src); + } else { /* 32-bit, no fetch */ + emit_sext(ctx, MIPS_R_T4, src); + emit_atomic_r(ctx, dst, MIPS_R_T4, off, imm); + } + break; + case BPF_CMPXCHG: + if (BPF_SIZE(code) == BPF_DW) { + emit_cmpxchg_r64(ctx, dst, src, off); + } else { + u8 tmp = res; + + if (res == dst) /* Don't overwrite dst */ + tmp = MIPS_R_T4; + emit_sext(ctx, tmp, res); + emit_sext(ctx, MIPS_R_T5, src); + emit_cmpxchg_r(ctx, dst, MIPS_R_T5, tmp, off); + if (res == dst) /* Restore result */ + emit_mov_r(ctx, res, MIPS_R_T4); + /* Result zext inserted by verifier */ + } + break; + default: + goto notyet; + } + break; + /* PC += off if dst == src */ + /* PC += off if dst != src */ + /* PC += off if dst & src */ + /* PC += off if dst > src */ + /* PC += off if dst >= src */ + /* PC += off if dst < src */ + /* PC += off if dst <= src */ + /* PC += off if dst > src (signed) */ + /* PC += off if dst >= src (signed) */ + /* PC += off if dst < src (signed) */ + /* PC += off if dst <= src (signed) */ + case BPF_JMP32 | BPF_JEQ | BPF_X: + case BPF_JMP32 | BPF_JNE | BPF_X: + case BPF_JMP32 | BPF_JSET | BPF_X: + case BPF_JMP32 | BPF_JGT | BPF_X: + case BPF_JMP32 | BPF_JGE | BPF_X: + case BPF_JMP32 | BPF_JLT | BPF_X: + case BPF_JMP32 | BPF_JLE | BPF_X: + case BPF_JMP32 | BPF_JSGT | BPF_X: + case BPF_JMP32 | BPF_JSGE | BPF_X: + case BPF_JMP32 | BPF_JSLT | BPF_X: + case BPF_JMP32 | BPF_JSLE | BPF_X: + if (off == 0) + break; + setup_jmp_r(ctx, dst == src, BPF_OP(code), off, &jmp, &rel); + emit_sext(ctx, MIPS_R_T4, dst); /* Sign-extended dst */ + emit_sext(ctx, MIPS_R_T5, src); /* Sign-extended src */ + emit_jmp_r(ctx, MIPS_R_T4, MIPS_R_T5, rel, jmp); + if (finish_jmp(ctx, jmp, off) < 0) + goto toofar; + break; + /* PC += off if dst == imm */ + /* PC += off if dst != imm */ + /* PC += off if dst & imm */ + /* PC += off if dst > imm */ + /* PC += off if dst >= imm */ + /* PC += off if dst < imm */ + /* PC += off if dst <= imm */ + /* PC += off if dst > imm (signed) */ + /* PC += off if dst >= imm (signed) */ + /* PC += off if dst < imm (signed) */ + /* PC += off if dst <= imm (signed) */ + case BPF_JMP32 | BPF_JEQ | BPF_K: + case BPF_JMP32 | BPF_JNE | BPF_K: + case BPF_JMP32 | BPF_JSET | BPF_K: + case BPF_JMP32 | BPF_JGT | BPF_K: + case BPF_JMP32 | BPF_JGE | BPF_K: + case BPF_JMP32 | BPF_JLT | BPF_K: + case BPF_JMP32 | BPF_JLE | BPF_K: + case BPF_JMP32 | BPF_JSGT | BPF_K: + case BPF_JMP32 | BPF_JSGE | BPF_K: + case BPF_JMP32 | BPF_JSLT | BPF_K: + case BPF_JMP32 | BPF_JSLE | BPF_K: + if (off == 0) + break; + setup_jmp_i(ctx, imm, 32, BPF_OP(code), off, &jmp, &rel); + emit_sext(ctx, MIPS_R_T4, dst); /* Sign-extended dst */ + if (valid_jmp_i(jmp, imm)) { + emit_jmp_i(ctx, MIPS_R_T4, imm, rel, jmp); + } else { + /* Move large immediate to register, sign-extended */ + emit_mov_i(ctx, MIPS_R_T5, imm); + emit_jmp_r(ctx, MIPS_R_T4, MIPS_R_T5, rel, jmp); + } + if (finish_jmp(ctx, jmp, off) < 0) + goto toofar; + break; + /* PC += off if dst == src */ + /* PC += off if dst != src */ + /* PC += off if dst & src */ + /* PC += off if dst > src */ + /* PC += off if dst >= src */ + /* PC += off if dst < src */ + /* PC += off if dst <= src */ + /* PC += off if dst > src (signed) */ + /* PC += off if dst >= src (signed) */ + /* PC += off if dst < src (signed) */ + /* PC += off if dst <= src (signed) */ + case BPF_JMP | BPF_JEQ | BPF_X: + case BPF_JMP | BPF_JNE | BPF_X: + case BPF_JMP | BPF_JSET | BPF_X: + case BPF_JMP | BPF_JGT | BPF_X: + case BPF_JMP | BPF_JGE | BPF_X: + case BPF_JMP | BPF_JLT | BPF_X: + case BPF_JMP | BPF_JLE | BPF_X: + case BPF_JMP | BPF_JSGT | BPF_X: + case BPF_JMP | BPF_JSGE | BPF_X: + case BPF_JMP | BPF_JSLT | BPF_X: + case BPF_JMP | BPF_JSLE | BPF_X: + if (off == 0) + break; + setup_jmp_r(ctx, dst == src, BPF_OP(code), off, &jmp, &rel); + emit_jmp_r(ctx, dst, src, rel, jmp); + if (finish_jmp(ctx, jmp, off) < 0) + goto toofar; + break; + /* PC += off if dst == imm */ + /* PC += off if dst != imm */ + /* PC += off if dst & imm */ + /* PC += off if dst > imm */ + /* PC += off if dst >= imm */ + /* PC += off if dst < imm */ + /* PC += off if dst <= imm */ + /* PC += off if dst > imm (signed) */ + /* PC += off if dst >= imm (signed) */ + /* PC += off if dst < imm (signed) */ + /* PC += off if dst <= imm (signed) */ + case BPF_JMP | BPF_JEQ | BPF_K: + case BPF_JMP | BPF_JNE | BPF_K: + case BPF_JMP | BPF_JSET | BPF_K: + case BPF_JMP | BPF_JGT | BPF_K: + case BPF_JMP | BPF_JGE | BPF_K: + case BPF_JMP | BPF_JLT | BPF_K: + case BPF_JMP | BPF_JLE | BPF_K: + case BPF_JMP | BPF_JSGT | BPF_K: + case BPF_JMP | BPF_JSGE | BPF_K: + case BPF_JMP | BPF_JSLT | BPF_K: + case BPF_JMP | BPF_JSLE | BPF_K: + if (off == 0) + break; + setup_jmp_i(ctx, imm, 64, BPF_OP(code), off, &jmp, &rel); + if (valid_jmp_i(jmp, imm)) { + emit_jmp_i(ctx, dst, imm, rel, jmp); + } else { + /* Move large immediate to register */ + emit_mov_i(ctx, MIPS_R_T4, imm); + emit_jmp_r(ctx, dst, MIPS_R_T4, rel, jmp); + } + if (finish_jmp(ctx, jmp, off) < 0) + goto toofar; + break; + /* PC += off */ + case BPF_JMP | BPF_JA: + if (off == 0) + break; + if (emit_ja(ctx, off) < 0) + goto toofar; + break; + /* Tail call */ + case BPF_JMP | BPF_TAIL_CALL: + if (emit_tail_call(ctx) < 0) + goto invalid; + break; + /* Function call */ + case BPF_JMP | BPF_CALL: + if (emit_call(ctx, insn) < 0) + goto invalid; + break; + /* Function return */ + case BPF_JMP | BPF_EXIT: + /* + * Optimization: when last instruction is EXIT + * simply continue to epilogue. + */ + if (ctx->bpf_index == ctx->program->len - 1) + break; + if (emit_exit(ctx) < 0) + goto toofar; + break; + + default: +invalid: + pr_err_once("unknown opcode %02x\n", code); + return -EINVAL; +notyet: + pr_info_once("*** NOT YET: opcode %02x ***\n", code); + return -EFAULT; +toofar: + pr_info_once("*** TOO FAR: jump at %u opcode %02x ***\n", + ctx->bpf_index, code); + return -E2BIG; + } + return 0; +} From 72570224bb8fecdb17c2f0ccebf02868d2513595 Mon Sep 17 00:00:00 2001 From: Johan Almbladh Date: Tue, 5 Oct 2021 18:54:06 +0200 Subject: [PATCH 019/181] mips, bpf: Add JIT workarounds for CPU errata This patch adds workarounds for the following CPU errata to the MIPS eBPF JIT, if enabled in the kernel configuration. - R10000 ll/sc weak ordering - Loongson-3 ll/sc weak ordering - Loongson-2F jump hang The Loongson-2F nop errata is implemented in uasm, which the JIT uses, so no additional mitigations are needed for that. Signed-off-by: Johan Almbladh Signed-off-by: Daniel Borkmann Signed-off-by: Andrii Nakryiko Reviewed-by: Jiaxun Yang Link: https://lore.kernel.org/bpf/20211005165408.2305108-6-johan.almbladh@anyfinetworks.com --- arch/mips/net/bpf_jit_comp.c | 6 ++++-- arch/mips/net/bpf_jit_comp.h | 26 +++++++++++++++++++++++++- arch/mips/net/bpf_jit_comp64.c | 10 ++++++---- 3 files changed, 35 insertions(+), 7 deletions(-) diff --git a/arch/mips/net/bpf_jit_comp.c b/arch/mips/net/bpf_jit_comp.c index 7eb95fc577107..b17130d510d49 100644 --- a/arch/mips/net/bpf_jit_comp.c +++ b/arch/mips/net/bpf_jit_comp.c @@ -404,6 +404,7 @@ void emit_alu_r(struct jit_context *ctx, u8 dst, u8 src, u8 op) /* Atomic read-modify-write (32-bit) */ void emit_atomic_r(struct jit_context *ctx, u8 dst, u8 src, s16 off, u8 code) { + LLSC_sync(ctx); emit(ctx, ll, MIPS_R_T9, off, dst); switch (code) { case BPF_ADD: @@ -427,7 +428,7 @@ void emit_atomic_r(struct jit_context *ctx, u8 dst, u8 src, s16 off, u8 code) break; } emit(ctx, sc, MIPS_R_T8, off, dst); - emit(ctx, beqz, MIPS_R_T8, -16); + emit(ctx, LLSC_beqz, MIPS_R_T8, -16 - LLSC_offset); emit(ctx, nop); /* Delay slot */ if (code & BPF_FETCH) { @@ -439,11 +440,12 @@ void emit_atomic_r(struct jit_context *ctx, u8 dst, u8 src, s16 off, u8 code) /* Atomic compare-and-exchange (32-bit) */ void emit_cmpxchg_r(struct jit_context *ctx, u8 dst, u8 src, u8 res, s16 off) { + LLSC_sync(ctx); emit(ctx, ll, MIPS_R_T9, off, dst); emit(ctx, bne, MIPS_R_T9, res, 12); emit(ctx, move, MIPS_R_T8, src); /* Delay slot */ emit(ctx, sc, MIPS_R_T8, off, dst); - emit(ctx, beqz, MIPS_R_T8, -20); + emit(ctx, LLSC_beqz, MIPS_R_T8, -20 - LLSC_offset); emit(ctx, move, res, MIPS_R_T9); /* Delay slot */ clobber_reg(ctx, res); } diff --git a/arch/mips/net/bpf_jit_comp.h b/arch/mips/net/bpf_jit_comp.h index 44787cf377dd0..6f3a7b07294b8 100644 --- a/arch/mips/net/bpf_jit_comp.h +++ b/arch/mips/net/bpf_jit_comp.h @@ -87,7 +87,7 @@ struct jit_context { }; /* Emit the instruction if the JIT memory space has been allocated */ -#define emit(ctx, func, ...) \ +#define __emit(ctx, func, ...) \ do { \ if ((ctx)->target != NULL) { \ u32 *p = &(ctx)->target[ctx->jit_index]; \ @@ -95,6 +95,30 @@ do { \ } \ (ctx)->jit_index++; \ } while (0) +#define emit(...) __emit(__VA_ARGS__) + +/* Workaround for R10000 ll/sc errata */ +#ifdef CONFIG_WAR_R10000 +#define LLSC_beqz beqzl +#else +#define LLSC_beqz beqz +#endif + +/* Workaround for Loongson-3 ll/sc errata */ +#ifdef CONFIG_CPU_LOONGSON3_WORKAROUNDS +#define LLSC_sync(ctx) emit(ctx, sync, 0) +#define LLSC_offset 4 +#else +#define LLSC_sync(ctx) +#define LLSC_offset 0 +#endif + +/* Workaround for Loongson-2F jump errata */ +#ifdef CONFIG_CPU_JUMP_WORKAROUNDS +#define JALR_MASK 0xffffffffcfffffffULL +#else +#define JALR_MASK (~0ULL) +#endif /* * Mark a BPF register as accessed, it needs to be diff --git a/arch/mips/net/bpf_jit_comp64.c b/arch/mips/net/bpf_jit_comp64.c index ca49d3ef7ff49..1f1f7b87f213e 100644 --- a/arch/mips/net/bpf_jit_comp64.c +++ b/arch/mips/net/bpf_jit_comp64.c @@ -375,6 +375,7 @@ static void emit_atomic_r64(struct jit_context *ctx, u8 t1 = MIPS_R_T6; u8 t2 = MIPS_R_T7; + LLSC_sync(ctx); emit(ctx, lld, t1, off, dst); switch (code) { case BPF_ADD: @@ -398,7 +399,7 @@ static void emit_atomic_r64(struct jit_context *ctx, break; } emit(ctx, scd, t2, off, dst); - emit(ctx, beqz, t2, -16); + emit(ctx, LLSC_beqz, t2, -16 - LLSC_offset); emit(ctx, nop); /* Delay slot */ if (code & BPF_FETCH) { @@ -414,12 +415,13 @@ static void emit_cmpxchg_r64(struct jit_context *ctx, u8 dst, u8 src, s16 off) u8 t1 = MIPS_R_T6; u8 t2 = MIPS_R_T7; + LLSC_sync(ctx); emit(ctx, lld, t1, off, dst); emit(ctx, bne, t1, r0, 12); emit(ctx, move, t2, src); /* Delay slot */ emit(ctx, scd, t2, off, dst); - emit(ctx, beqz, t2, -20); - emit(ctx, move, r0, t1); /* Delay slot */ + emit(ctx, LLSC_beqz, t2, -20 - LLSC_offset); + emit(ctx, move, r0, t1); /* Delay slot */ clobber_reg(ctx, r0); } @@ -443,7 +445,7 @@ static int emit_call(struct jit_context *ctx, const struct bpf_insn *insn) push_regs(ctx, ctx->clobbered & JIT_CALLER_REGS, 0, 0); /* Emit function call */ - emit_mov_i64(ctx, tmp, addr); + emit_mov_i64(ctx, tmp, addr & JALR_MASK); emit(ctx, jalr, MIPS_R_RA, tmp); emit(ctx, nop); /* Delay slot */ From 01bdc58e94b46b88d4921f46f423bdeb8b137f28 Mon Sep 17 00:00:00 2001 From: Johan Almbladh Date: Tue, 5 Oct 2021 18:54:07 +0200 Subject: [PATCH 020/181] mips, bpf: Enable eBPF JITs This patch enables the new eBPF JITs for 32-bit and 64-bit MIPS. It also disables the old cBPF JIT to so cBPF programs are converted to use the new JIT. Workarounds for R4000 CPU errata are not implemented by the JIT, so the JIT is disabled if any of those workarounds are configured. Signed-off-by: Johan Almbladh Signed-off-by: Daniel Borkmann Signed-off-by: Andrii Nakryiko Link: https://lore.kernel.org/bpf/20211005165408.2305108-7-johan.almbladh@anyfinetworks.com --- MAINTAINERS | 1 + arch/mips/Kconfig | 6 ++++-- arch/mips/net/Makefile | 5 +++-- 3 files changed, 8 insertions(+), 4 deletions(-) diff --git a/MAINTAINERS b/MAINTAINERS index 76e0fdcdd8776..407cd42359957 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -3422,6 +3422,7 @@ S: Supported F: arch/arm64/net/ BPF JIT for MIPS (32-BIT AND 64-BIT) +M: Johan Almbladh M: Paul Burton L: netdev@vger.kernel.org L: bpf@vger.kernel.org diff --git a/arch/mips/Kconfig b/arch/mips/Kconfig index 771ca53af06d2..38468f47aa5e9 100644 --- a/arch/mips/Kconfig +++ b/arch/mips/Kconfig @@ -57,7 +57,6 @@ config MIPS select HAVE_ARCH_TRACEHOOK select HAVE_ARCH_TRANSPARENT_HUGEPAGE if CPU_SUPPORTS_HUGEPAGES select HAVE_ASM_MODVERSIONS - select HAVE_CBPF_JIT if !64BIT && !CPU_MICROMIPS select HAVE_CONTEXT_TRACKING select HAVE_TIF_NOHZ select HAVE_C_RECORDMCOUNT @@ -65,7 +64,10 @@ config MIPS select HAVE_DEBUG_STACKOVERFLOW select HAVE_DMA_CONTIGUOUS select HAVE_DYNAMIC_FTRACE - select HAVE_EBPF_JIT if 64BIT && !CPU_MICROMIPS && TARGET_ISA_REV >= 2 + select HAVE_EBPF_JIT if !CPU_MICROMIPS && \ + !CPU_DADDI_WORKAROUNDS && \ + !CPU_R4000_WORKAROUNDS && \ + !CPU_R4400_WORKAROUNDS select HAVE_EXIT_THREAD select HAVE_FAST_GUP select HAVE_FTRACE_MCOUNT_RECORD diff --git a/arch/mips/net/Makefile b/arch/mips/net/Makefile index e057ee4ba75e8..602bf242b13fd 100644 --- a/arch/mips/net/Makefile +++ b/arch/mips/net/Makefile @@ -2,9 +2,10 @@ # MIPS networking code obj-$(CONFIG_MIPS_CBPF_JIT) += bpf_jit.o bpf_jit_asm.o +obj-$(CONFIG_MIPS_EBPF_JIT) += bpf_jit_comp.o ifeq ($(CONFIG_32BIT),y) - obj-$(CONFIG_MIPS_EBPF_JIT) += bpf_jit_comp.o bpf_jit_comp32.o + obj-$(CONFIG_MIPS_EBPF_JIT) += bpf_jit_comp32.o else - obj-$(CONFIG_MIPS_EBPF_JIT) += ebpf_jit.o + obj-$(CONFIG_MIPS_EBPF_JIT) += bpf_jit_comp64.o endif From ebcbacfa50ecd7a828f40fefbb58d641f265da0d Mon Sep 17 00:00:00 2001 From: Johan Almbladh Date: Tue, 5 Oct 2021 18:54:08 +0200 Subject: [PATCH 021/181] mips, bpf: Remove old BPF JIT implementations This patch removes the old 32-bit cBPF and 64-bit eBPF JIT implementations. They are replaced by a new eBPF implementation that supports both 32-bit and 64-bit MIPS CPUs. Signed-off-by: Johan Almbladh Signed-off-by: Daniel Borkmann Signed-off-by: Andrii Nakryiko Link: https://lore.kernel.org/bpf/20211005165408.2305108-8-johan.almbladh@anyfinetworks.com --- arch/mips/net/bpf_jit.c | 1299 ----------------------- arch/mips/net/bpf_jit.h | 81 -- arch/mips/net/bpf_jit_asm.S | 285 ------ arch/mips/net/ebpf_jit.c | 1938 ----------------------------------- 4 files changed, 3603 deletions(-) delete mode 100644 arch/mips/net/bpf_jit.c delete mode 100644 arch/mips/net/bpf_jit.h delete mode 100644 arch/mips/net/bpf_jit_asm.S delete mode 100644 arch/mips/net/ebpf_jit.c diff --git a/arch/mips/net/bpf_jit.c b/arch/mips/net/bpf_jit.c deleted file mode 100644 index cb6d22439f71b..0000000000000 --- a/arch/mips/net/bpf_jit.c +++ /dev/null @@ -1,1299 +0,0 @@ -/* - * Just-In-Time compiler for BPF filters on MIPS - * - * Copyright (c) 2014 Imagination Technologies Ltd. - * Author: Markos Chandras - * - * This program is free software; you can redistribute it and/or modify it - * under the terms of the GNU General Public License as published by the - * Free Software Foundation; version 2 of the License. - */ - -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include - -#include "bpf_jit.h" - -/* ABI - * r_skb_hl SKB header length - * r_data SKB data pointer - * r_off Offset - * r_A BPF register A - * r_X BPF register X - * r_skb *skb - * r_M *scratch memory - * r_skb_len SKB length - * - * On entry (*bpf_func)(*skb, *filter) - * a0 = MIPS_R_A0 = skb; - * a1 = MIPS_R_A1 = filter; - * - * Stack - * ... - * M[15] - * M[14] - * M[13] - * ... - * M[0] <-- r_M - * saved reg k-1 - * saved reg k-2 - * ... - * saved reg 0 <-- r_sp - * - * - * Packet layout - * - * <--------------------- len ------------------------> - * <--skb-len(r_skb_hl)-->< ----- skb->data_len ------> - * ---------------------------------------------------- - * | skb->data | - * ---------------------------------------------------- - */ - -#define ptr typeof(unsigned long) - -#define SCRATCH_OFF(k) (4 * (k)) - -/* JIT flags */ -#define SEEN_CALL (1 << BPF_MEMWORDS) -#define SEEN_SREG_SFT (BPF_MEMWORDS + 1) -#define SEEN_SREG_BASE (1 << SEEN_SREG_SFT) -#define SEEN_SREG(x) (SEEN_SREG_BASE << (x)) -#define SEEN_OFF SEEN_SREG(2) -#define SEEN_A SEEN_SREG(3) -#define SEEN_X SEEN_SREG(4) -#define SEEN_SKB SEEN_SREG(5) -#define SEEN_MEM SEEN_SREG(6) -/* SEEN_SK_DATA also implies skb_hl an skb_len */ -#define SEEN_SKB_DATA (SEEN_SREG(7) | SEEN_SREG(1) | SEEN_SREG(0)) - -/* Arguments used by JIT */ -#define ARGS_USED_BY_JIT 2 /* only applicable to 64-bit */ - -#define SBIT(x) (1 << (x)) /* Signed version of BIT() */ - -/** - * struct jit_ctx - JIT context - * @skf: The sk_filter - * @prologue_bytes: Number of bytes for prologue - * @idx: Instruction index - * @flags: JIT flags - * @offsets: Instruction offsets - * @target: Memory location for the compiled filter - */ -struct jit_ctx { - const struct bpf_prog *skf; - unsigned int prologue_bytes; - u32 idx; - u32 flags; - u32 *offsets; - u32 *target; -}; - - -static inline int optimize_div(u32 *k) -{ - /* power of 2 divides can be implemented with right shift */ - if (!(*k & (*k-1))) { - *k = ilog2(*k); - return 1; - } - - return 0; -} - -static inline void emit_jit_reg_move(ptr dst, ptr src, struct jit_ctx *ctx); - -/* Simply emit the instruction if the JIT memory space has been allocated */ -#define emit_instr(ctx, func, ...) \ -do { \ - if ((ctx)->target != NULL) { \ - u32 *p = &(ctx)->target[ctx->idx]; \ - uasm_i_##func(&p, ##__VA_ARGS__); \ - } \ - (ctx)->idx++; \ -} while (0) - -/* - * Similar to emit_instr but it must be used when we need to emit - * 32-bit or 64-bit instructions - */ -#define emit_long_instr(ctx, func, ...) \ -do { \ - if ((ctx)->target != NULL) { \ - u32 *p = &(ctx)->target[ctx->idx]; \ - UASM_i_##func(&p, ##__VA_ARGS__); \ - } \ - (ctx)->idx++; \ -} while (0) - -/* Determine if immediate is within the 16-bit signed range */ -static inline bool is_range16(s32 imm) -{ - return !(imm >= SBIT(15) || imm < -SBIT(15)); -} - -static inline void emit_addu(unsigned int dst, unsigned int src1, - unsigned int src2, struct jit_ctx *ctx) -{ - emit_instr(ctx, addu, dst, src1, src2); -} - -static inline void emit_nop(struct jit_ctx *ctx) -{ - emit_instr(ctx, nop); -} - -/* Load a u32 immediate to a register */ -static inline void emit_load_imm(unsigned int dst, u32 imm, struct jit_ctx *ctx) -{ - if (ctx->target != NULL) { - /* addiu can only handle s16 */ - if (!is_range16(imm)) { - u32 *p = &ctx->target[ctx->idx]; - uasm_i_lui(&p, r_tmp_imm, (s32)imm >> 16); - p = &ctx->target[ctx->idx + 1]; - uasm_i_ori(&p, dst, r_tmp_imm, imm & 0xffff); - } else { - u32 *p = &ctx->target[ctx->idx]; - uasm_i_addiu(&p, dst, r_zero, imm); - } - } - ctx->idx++; - - if (!is_range16(imm)) - ctx->idx++; -} - -static inline void emit_or(unsigned int dst, unsigned int src1, - unsigned int src2, struct jit_ctx *ctx) -{ - emit_instr(ctx, or, dst, src1, src2); -} - -static inline void emit_ori(unsigned int dst, unsigned src, u32 imm, - struct jit_ctx *ctx) -{ - if (imm >= BIT(16)) { - emit_load_imm(r_tmp, imm, ctx); - emit_or(dst, src, r_tmp, ctx); - } else { - emit_instr(ctx, ori, dst, src, imm); - } -} - -static inline void emit_daddiu(unsigned int dst, unsigned int src, - int imm, struct jit_ctx *ctx) -{ - /* - * Only used for stack, so the imm is relatively small - * and it fits in 15-bits - */ - emit_instr(ctx, daddiu, dst, src, imm); -} - -static inline void emit_addiu(unsigned int dst, unsigned int src, - u32 imm, struct jit_ctx *ctx) -{ - if (!is_range16(imm)) { - emit_load_imm(r_tmp, imm, ctx); - emit_addu(dst, r_tmp, src, ctx); - } else { - emit_instr(ctx, addiu, dst, src, imm); - } -} - -static inline void emit_and(unsigned int dst, unsigned int src1, - unsigned int src2, struct jit_ctx *ctx) -{ - emit_instr(ctx, and, dst, src1, src2); -} - -static inline void emit_andi(unsigned int dst, unsigned int src, - u32 imm, struct jit_ctx *ctx) -{ - /* If imm does not fit in u16 then load it to register */ - if (imm >= BIT(16)) { - emit_load_imm(r_tmp, imm, ctx); - emit_and(dst, src, r_tmp, ctx); - } else { - emit_instr(ctx, andi, dst, src, imm); - } -} - -static inline void emit_xor(unsigned int dst, unsigned int src1, - unsigned int src2, struct jit_ctx *ctx) -{ - emit_instr(ctx, xor, dst, src1, src2); -} - -static inline void emit_xori(ptr dst, ptr src, u32 imm, struct jit_ctx *ctx) -{ - /* If imm does not fit in u16 then load it to register */ - if (imm >= BIT(16)) { - emit_load_imm(r_tmp, imm, ctx); - emit_xor(dst, src, r_tmp, ctx); - } else { - emit_instr(ctx, xori, dst, src, imm); - } -} - -static inline void emit_stack_offset(int offset, struct jit_ctx *ctx) -{ - emit_long_instr(ctx, ADDIU, r_sp, r_sp, offset); -} - -static inline void emit_subu(unsigned int dst, unsigned int src1, - unsigned int src2, struct jit_ctx *ctx) -{ - emit_instr(ctx, subu, dst, src1, src2); -} - -static inline void emit_neg(unsigned int reg, struct jit_ctx *ctx) -{ - emit_subu(reg, r_zero, reg, ctx); -} - -static inline void emit_sllv(unsigned int dst, unsigned int src, - unsigned int sa, struct jit_ctx *ctx) -{ - emit_instr(ctx, sllv, dst, src, sa); -} - -static inline void emit_sll(unsigned int dst, unsigned int src, - unsigned int sa, struct jit_ctx *ctx) -{ - /* sa is 5-bits long */ - if (sa >= BIT(5)) - /* Shifting >= 32 results in zero */ - emit_jit_reg_move(dst, r_zero, ctx); - else - emit_instr(ctx, sll, dst, src, sa); -} - -static inline void emit_srlv(unsigned int dst, unsigned int src, - unsigned int sa, struct jit_ctx *ctx) -{ - emit_instr(ctx, srlv, dst, src, sa); -} - -static inline void emit_srl(unsigned int dst, unsigned int src, - unsigned int sa, struct jit_ctx *ctx) -{ - /* sa is 5-bits long */ - if (sa >= BIT(5)) - /* Shifting >= 32 results in zero */ - emit_jit_reg_move(dst, r_zero, ctx); - else - emit_instr(ctx, srl, dst, src, sa); -} - -static inline void emit_slt(unsigned int dst, unsigned int src1, - unsigned int src2, struct jit_ctx *ctx) -{ - emit_instr(ctx, slt, dst, src1, src2); -} - -static inline void emit_sltu(unsigned int dst, unsigned int src1, - unsigned int src2, struct jit_ctx *ctx) -{ - emit_instr(ctx, sltu, dst, src1, src2); -} - -static inline void emit_sltiu(unsigned dst, unsigned int src, - unsigned int imm, struct jit_ctx *ctx) -{ - /* 16 bit immediate */ - if (!is_range16((s32)imm)) { - emit_load_imm(r_tmp, imm, ctx); - emit_sltu(dst, src, r_tmp, ctx); - } else { - emit_instr(ctx, sltiu, dst, src, imm); - } - -} - -/* Store register on the stack */ -static inline void emit_store_stack_reg(ptr reg, ptr base, - unsigned int offset, - struct jit_ctx *ctx) -{ - emit_long_instr(ctx, SW, reg, offset, base); -} - -static inline void emit_store(ptr reg, ptr base, unsigned int offset, - struct jit_ctx *ctx) -{ - emit_instr(ctx, sw, reg, offset, base); -} - -static inline void emit_load_stack_reg(ptr reg, ptr base, - unsigned int offset, - struct jit_ctx *ctx) -{ - emit_long_instr(ctx, LW, reg, offset, base); -} - -static inline void emit_load(unsigned int reg, unsigned int base, - unsigned int offset, struct jit_ctx *ctx) -{ - emit_instr(ctx, lw, reg, offset, base); -} - -static inline void emit_load_byte(unsigned int reg, unsigned int base, - unsigned int offset, struct jit_ctx *ctx) -{ - emit_instr(ctx, lb, reg, offset, base); -} - -static inline void emit_half_load(unsigned int reg, unsigned int base, - unsigned int offset, struct jit_ctx *ctx) -{ - emit_instr(ctx, lh, reg, offset, base); -} - -static inline void emit_half_load_unsigned(unsigned int reg, unsigned int base, - unsigned int offset, struct jit_ctx *ctx) -{ - emit_instr(ctx, lhu, reg, offset, base); -} - -static inline void emit_mul(unsigned int dst, unsigned int src1, - unsigned int src2, struct jit_ctx *ctx) -{ - emit_instr(ctx, mul, dst, src1, src2); -} - -static inline void emit_div(unsigned int dst, unsigned int src, - struct jit_ctx *ctx) -{ - if (ctx->target != NULL) { - u32 *p = &ctx->target[ctx->idx]; - uasm_i_divu(&p, dst, src); - p = &ctx->target[ctx->idx + 1]; - uasm_i_mflo(&p, dst); - } - ctx->idx += 2; /* 2 insts */ -} - -static inline void emit_mod(unsigned int dst, unsigned int src, - struct jit_ctx *ctx) -{ - if (ctx->target != NULL) { - u32 *p = &ctx->target[ctx->idx]; - uasm_i_divu(&p, dst, src); - p = &ctx->target[ctx->idx + 1]; - uasm_i_mfhi(&p, dst); - } - ctx->idx += 2; /* 2 insts */ -} - -static inline void emit_dsll(unsigned int dst, unsigned int src, - unsigned int sa, struct jit_ctx *ctx) -{ - emit_instr(ctx, dsll, dst, src, sa); -} - -static inline void emit_dsrl32(unsigned int dst, unsigned int src, - unsigned int sa, struct jit_ctx *ctx) -{ - emit_instr(ctx, dsrl32, dst, src, sa); -} - -static inline void emit_wsbh(unsigned int dst, unsigned int src, - struct jit_ctx *ctx) -{ - emit_instr(ctx, wsbh, dst, src); -} - -/* load pointer to register */ -static inline void emit_load_ptr(unsigned int dst, unsigned int src, - int imm, struct jit_ctx *ctx) -{ - /* src contains the base addr of the 32/64-pointer */ - emit_long_instr(ctx, LW, dst, imm, src); -} - -/* load a function pointer to register */ -static inline void emit_load_func(unsigned int reg, ptr imm, - struct jit_ctx *ctx) -{ - if (IS_ENABLED(CONFIG_64BIT)) { - /* At this point imm is always 64-bit */ - emit_load_imm(r_tmp, (u64)imm >> 32, ctx); - emit_dsll(r_tmp_imm, r_tmp, 16, ctx); /* left shift by 16 */ - emit_ori(r_tmp, r_tmp_imm, (imm >> 16) & 0xffff, ctx); - emit_dsll(r_tmp_imm, r_tmp, 16, ctx); /* left shift by 16 */ - emit_ori(reg, r_tmp_imm, imm & 0xffff, ctx); - } else { - emit_load_imm(reg, imm, ctx); - } -} - -/* Move to real MIPS register */ -static inline void emit_reg_move(ptr dst, ptr src, struct jit_ctx *ctx) -{ - emit_long_instr(ctx, ADDU, dst, src, r_zero); -} - -/* Move to JIT (32-bit) register */ -static inline void emit_jit_reg_move(ptr dst, ptr src, struct jit_ctx *ctx) -{ - emit_addu(dst, src, r_zero, ctx); -} - -/* Compute the immediate value for PC-relative branches. */ -static inline u32 b_imm(unsigned int tgt, struct jit_ctx *ctx) -{ - if (ctx->target == NULL) - return 0; - - /* - * We want a pc-relative branch. We only do forward branches - * so tgt is always after pc. tgt is the instruction offset - * we want to jump to. - - * Branch on MIPS: - * I: target_offset <- sign_extend(offset) - * I+1: PC += target_offset (delay slot) - * - * ctx->idx currently points to the branch instruction - * but the offset is added to the delay slot so we need - * to subtract 4. - */ - return ctx->offsets[tgt] - - (ctx->idx * 4 - ctx->prologue_bytes) - 4; -} - -static inline void emit_bcond(int cond, unsigned int reg1, unsigned int reg2, - unsigned int imm, struct jit_ctx *ctx) -{ - if (ctx->target != NULL) { - u32 *p = &ctx->target[ctx->idx]; - - switch (cond) { - case MIPS_COND_EQ: - uasm_i_beq(&p, reg1, reg2, imm); - break; - case MIPS_COND_NE: - uasm_i_bne(&p, reg1, reg2, imm); - break; - case MIPS_COND_ALL: - uasm_i_b(&p, imm); - break; - default: - pr_warn("%s: Unhandled branch conditional: %d\n", - __func__, cond); - } - } - ctx->idx++; -} - -static inline void emit_b(unsigned int imm, struct jit_ctx *ctx) -{ - emit_bcond(MIPS_COND_ALL, r_zero, r_zero, imm, ctx); -} - -static inline void emit_jalr(unsigned int link, unsigned int reg, - struct jit_ctx *ctx) -{ - emit_instr(ctx, jalr, link, reg); -} - -static inline void emit_jr(unsigned int reg, struct jit_ctx *ctx) -{ - emit_instr(ctx, jr, reg); -} - -static inline u16 align_sp(unsigned int num) -{ - /* Double word alignment for 32-bit, quadword for 64-bit */ - unsigned int align = IS_ENABLED(CONFIG_64BIT) ? 16 : 8; - num = (num + (align - 1)) & -align; - return num; -} - -static void save_bpf_jit_regs(struct jit_ctx *ctx, unsigned offset) -{ - int i = 0, real_off = 0; - u32 sflags, tmp_flags; - - /* Adjust the stack pointer */ - if (offset) - emit_stack_offset(-align_sp(offset), ctx); - - tmp_flags = sflags = ctx->flags >> SEEN_SREG_SFT; - /* sflags is essentially a bitmap */ - while (tmp_flags) { - if ((sflags >> i) & 0x1) { - emit_store_stack_reg(MIPS_R_S0 + i, r_sp, real_off, - ctx); - real_off += SZREG; - } - i++; - tmp_flags >>= 1; - } - - /* save return address */ - if (ctx->flags & SEEN_CALL) { - emit_store_stack_reg(r_ra, r_sp, real_off, ctx); - real_off += SZREG; - } - - /* Setup r_M leaving the alignment gap if necessary */ - if (ctx->flags & SEEN_MEM) { - if (real_off % (SZREG * 2)) - real_off += SZREG; - emit_long_instr(ctx, ADDIU, r_M, r_sp, real_off); - } -} - -static void restore_bpf_jit_regs(struct jit_ctx *ctx, - unsigned int offset) -{ - int i, real_off = 0; - u32 sflags, tmp_flags; - - tmp_flags = sflags = ctx->flags >> SEEN_SREG_SFT; - /* sflags is a bitmap */ - i = 0; - while (tmp_flags) { - if ((sflags >> i) & 0x1) { - emit_load_stack_reg(MIPS_R_S0 + i, r_sp, real_off, - ctx); - real_off += SZREG; - } - i++; - tmp_flags >>= 1; - } - - /* restore return address */ - if (ctx->flags & SEEN_CALL) - emit_load_stack_reg(r_ra, r_sp, real_off, ctx); - - /* Restore the sp and discard the scrach memory */ - if (offset) - emit_stack_offset(align_sp(offset), ctx); -} - -static unsigned int get_stack_depth(struct jit_ctx *ctx) -{ - int sp_off = 0; - - - /* How may s* regs do we need to preserved? */ - sp_off += hweight32(ctx->flags >> SEEN_SREG_SFT) * SZREG; - - if (ctx->flags & SEEN_MEM) - sp_off += 4 * BPF_MEMWORDS; /* BPF_MEMWORDS are 32-bit */ - - if (ctx->flags & SEEN_CALL) - sp_off += SZREG; /* Space for our ra register */ - - return sp_off; -} - -static void build_prologue(struct jit_ctx *ctx) -{ - int sp_off; - - /* Calculate the total offset for the stack pointer */ - sp_off = get_stack_depth(ctx); - save_bpf_jit_regs(ctx, sp_off); - - if (ctx->flags & SEEN_SKB) - emit_reg_move(r_skb, MIPS_R_A0, ctx); - - if (ctx->flags & SEEN_SKB_DATA) { - /* Load packet length */ - emit_load(r_skb_len, r_skb, offsetof(struct sk_buff, len), - ctx); - emit_load(r_tmp, r_skb, offsetof(struct sk_buff, data_len), - ctx); - /* Load the data pointer */ - emit_load_ptr(r_skb_data, r_skb, - offsetof(struct sk_buff, data), ctx); - /* Load the header length */ - emit_subu(r_skb_hl, r_skb_len, r_tmp, ctx); - } - - if (ctx->flags & SEEN_X) - emit_jit_reg_move(r_X, r_zero, ctx); - - /* - * Do not leak kernel data to userspace, we only need to clear - * r_A if it is ever used. In fact if it is never used, we - * will not save/restore it, so clearing it in this case would - * corrupt the state of the caller. - */ - if (bpf_needs_clear_a(&ctx->skf->insns[0]) && - (ctx->flags & SEEN_A)) - emit_jit_reg_move(r_A, r_zero, ctx); -} - -static void build_epilogue(struct jit_ctx *ctx) -{ - unsigned int sp_off; - - /* Calculate the total offset for the stack pointer */ - - sp_off = get_stack_depth(ctx); - restore_bpf_jit_regs(ctx, sp_off); - - /* Return */ - emit_jr(r_ra, ctx); - emit_nop(ctx); -} - -#define CHOOSE_LOAD_FUNC(K, func) \ - ((int)K < 0 ? ((int)K >= SKF_LL_OFF ? func##_negative : func) : \ - func##_positive) - -static bool is_bad_offset(int b_off) -{ - return b_off > 0x1ffff || b_off < -0x20000; -} - -static int build_body(struct jit_ctx *ctx) -{ - const struct bpf_prog *prog = ctx->skf; - const struct sock_filter *inst; - unsigned int i, off, condt; - u32 k, b_off __maybe_unused; - u8 (*sk_load_func)(unsigned long *skb, int offset); - - for (i = 0; i < prog->len; i++) { - u16 code; - - inst = &(prog->insns[i]); - pr_debug("%s: code->0x%02x, jt->0x%x, jf->0x%x, k->0x%x\n", - __func__, inst->code, inst->jt, inst->jf, inst->k); - k = inst->k; - code = bpf_anc_helper(inst); - - if (ctx->target == NULL) - ctx->offsets[i] = ctx->idx * 4; - - switch (code) { - case BPF_LD | BPF_IMM: - /* A <- k ==> li r_A, k */ - ctx->flags |= SEEN_A; - emit_load_imm(r_A, k, ctx); - break; - case BPF_LD | BPF_W | BPF_LEN: - BUILD_BUG_ON(sizeof_field(struct sk_buff, len) != 4); - /* A <- len ==> lw r_A, offset(skb) */ - ctx->flags |= SEEN_SKB | SEEN_A; - off = offsetof(struct sk_buff, len); - emit_load(r_A, r_skb, off, ctx); - break; - case BPF_LD | BPF_MEM: - /* A <- M[k] ==> lw r_A, offset(M) */ - ctx->flags |= SEEN_MEM | SEEN_A; - emit_load(r_A, r_M, SCRATCH_OFF(k), ctx); - break; - case BPF_LD | BPF_W | BPF_ABS: - /* A <- P[k:4] */ - sk_load_func = CHOOSE_LOAD_FUNC(k, sk_load_word); - goto load; - case BPF_LD | BPF_H | BPF_ABS: - /* A <- P[k:2] */ - sk_load_func = CHOOSE_LOAD_FUNC(k, sk_load_half); - goto load; - case BPF_LD | BPF_B | BPF_ABS: - /* A <- P[k:1] */ - sk_load_func = CHOOSE_LOAD_FUNC(k, sk_load_byte); -load: - emit_load_imm(r_off, k, ctx); -load_common: - ctx->flags |= SEEN_CALL | SEEN_OFF | - SEEN_SKB | SEEN_A | SEEN_SKB_DATA; - - emit_load_func(r_s0, (ptr)sk_load_func, ctx); - emit_reg_move(MIPS_R_A0, r_skb, ctx); - emit_jalr(MIPS_R_RA, r_s0, ctx); - /* Load second argument to delay slot */ - emit_reg_move(MIPS_R_A1, r_off, ctx); - /* Check the error value */ - emit_bcond(MIPS_COND_EQ, r_ret, 0, b_imm(i + 1, ctx), - ctx); - /* Load return register on DS for failures */ - emit_reg_move(r_ret, r_zero, ctx); - /* Return with error */ - b_off = b_imm(prog->len, ctx); - if (is_bad_offset(b_off)) - return -E2BIG; - emit_b(b_off, ctx); - emit_nop(ctx); - break; - case BPF_LD | BPF_W | BPF_IND: - /* A <- P[X + k:4] */ - sk_load_func = sk_load_word; - goto load_ind; - case BPF_LD | BPF_H | BPF_IND: - /* A <- P[X + k:2] */ - sk_load_func = sk_load_half; - goto load_ind; - case BPF_LD | BPF_B | BPF_IND: - /* A <- P[X + k:1] */ - sk_load_func = sk_load_byte; -load_ind: - ctx->flags |= SEEN_OFF | SEEN_X; - emit_addiu(r_off, r_X, k, ctx); - goto load_common; - case BPF_LDX | BPF_IMM: - /* X <- k */ - ctx->flags |= SEEN_X; - emit_load_imm(r_X, k, ctx); - break; - case BPF_LDX | BPF_MEM: - /* X <- M[k] */ - ctx->flags |= SEEN_X | SEEN_MEM; - emit_load(r_X, r_M, SCRATCH_OFF(k), ctx); - break; - case BPF_LDX | BPF_W | BPF_LEN: - /* X <- len */ - ctx->flags |= SEEN_X | SEEN_SKB; - off = offsetof(struct sk_buff, len); - emit_load(r_X, r_skb, off, ctx); - break; - case BPF_LDX | BPF_B | BPF_MSH: - /* X <- 4 * (P[k:1] & 0xf) */ - ctx->flags |= SEEN_X | SEEN_CALL | SEEN_SKB; - /* Load offset to a1 */ - emit_load_func(r_s0, (ptr)sk_load_byte, ctx); - /* - * This may emit two instructions so it may not fit - * in the delay slot. So use a0 in the delay slot. - */ - emit_load_imm(MIPS_R_A1, k, ctx); - emit_jalr(MIPS_R_RA, r_s0, ctx); - emit_reg_move(MIPS_R_A0, r_skb, ctx); /* delay slot */ - /* Check the error value */ - b_off = b_imm(prog->len, ctx); - if (is_bad_offset(b_off)) - return -E2BIG; - emit_bcond(MIPS_COND_NE, r_ret, 0, b_off, ctx); - emit_reg_move(r_ret, r_zero, ctx); - /* We are good */ - /* X <- P[1:K] & 0xf */ - emit_andi(r_X, r_A, 0xf, ctx); - /* X << 2 */ - emit_b(b_imm(i + 1, ctx), ctx); - emit_sll(r_X, r_X, 2, ctx); /* delay slot */ - break; - case BPF_ST: - /* M[k] <- A */ - ctx->flags |= SEEN_MEM | SEEN_A; - emit_store(r_A, r_M, SCRATCH_OFF(k), ctx); - break; - case BPF_STX: - /* M[k] <- X */ - ctx->flags |= SEEN_MEM | SEEN_X; - emit_store(r_X, r_M, SCRATCH_OFF(k), ctx); - break; - case BPF_ALU | BPF_ADD | BPF_K: - /* A += K */ - ctx->flags |= SEEN_A; - emit_addiu(r_A, r_A, k, ctx); - break; - case BPF_ALU | BPF_ADD | BPF_X: - /* A += X */ - ctx->flags |= SEEN_A | SEEN_X; - emit_addu(r_A, r_A, r_X, ctx); - break; - case BPF_ALU | BPF_SUB | BPF_K: - /* A -= K */ - ctx->flags |= SEEN_A; - emit_addiu(r_A, r_A, -k, ctx); - break; - case BPF_ALU | BPF_SUB | BPF_X: - /* A -= X */ - ctx->flags |= SEEN_A | SEEN_X; - emit_subu(r_A, r_A, r_X, ctx); - break; - case BPF_ALU | BPF_MUL | BPF_K: - /* A *= K */ - /* Load K to scratch register before MUL */ - ctx->flags |= SEEN_A; - emit_load_imm(r_s0, k, ctx); - emit_mul(r_A, r_A, r_s0, ctx); - break; - case BPF_ALU | BPF_MUL | BPF_X: - /* A *= X */ - ctx->flags |= SEEN_A | SEEN_X; - emit_mul(r_A, r_A, r_X, ctx); - break; - case BPF_ALU | BPF_DIV | BPF_K: - /* A /= k */ - if (k == 1) - break; - if (optimize_div(&k)) { - ctx->flags |= SEEN_A; - emit_srl(r_A, r_A, k, ctx); - break; - } - ctx->flags |= SEEN_A; - emit_load_imm(r_s0, k, ctx); - emit_div(r_A, r_s0, ctx); - break; - case BPF_ALU | BPF_MOD | BPF_K: - /* A %= k */ - if (k == 1) { - ctx->flags |= SEEN_A; - emit_jit_reg_move(r_A, r_zero, ctx); - } else { - ctx->flags |= SEEN_A; - emit_load_imm(r_s0, k, ctx); - emit_mod(r_A, r_s0, ctx); - } - break; - case BPF_ALU | BPF_DIV | BPF_X: - /* A /= X */ - ctx->flags |= SEEN_X | SEEN_A; - /* Check if r_X is zero */ - b_off = b_imm(prog->len, ctx); - if (is_bad_offset(b_off)) - return -E2BIG; - emit_bcond(MIPS_COND_EQ, r_X, r_zero, b_off, ctx); - emit_load_imm(r_ret, 0, ctx); /* delay slot */ - emit_div(r_A, r_X, ctx); - break; - case BPF_ALU | BPF_MOD | BPF_X: - /* A %= X */ - ctx->flags |= SEEN_X | SEEN_A; - /* Check if r_X is zero */ - b_off = b_imm(prog->len, ctx); - if (is_bad_offset(b_off)) - return -E2BIG; - emit_bcond(MIPS_COND_EQ, r_X, r_zero, b_off, ctx); - emit_load_imm(r_ret, 0, ctx); /* delay slot */ - emit_mod(r_A, r_X, ctx); - break; - case BPF_ALU | BPF_OR | BPF_K: - /* A |= K */ - ctx->flags |= SEEN_A; - emit_ori(r_A, r_A, k, ctx); - break; - case BPF_ALU | BPF_OR | BPF_X: - /* A |= X */ - ctx->flags |= SEEN_A; - emit_ori(r_A, r_A, r_X, ctx); - break; - case BPF_ALU | BPF_XOR | BPF_K: - /* A ^= k */ - ctx->flags |= SEEN_A; - emit_xori(r_A, r_A, k, ctx); - break; - case BPF_ANC | SKF_AD_ALU_XOR_X: - case BPF_ALU | BPF_XOR | BPF_X: - /* A ^= X */ - ctx->flags |= SEEN_A; - emit_xor(r_A, r_A, r_X, ctx); - break; - case BPF_ALU | BPF_AND | BPF_K: - /* A &= K */ - ctx->flags |= SEEN_A; - emit_andi(r_A, r_A, k, ctx); - break; - case BPF_ALU | BPF_AND | BPF_X: - /* A &= X */ - ctx->flags |= SEEN_A | SEEN_X; - emit_and(r_A, r_A, r_X, ctx); - break; - case BPF_ALU | BPF_LSH | BPF_K: - /* A <<= K */ - ctx->flags |= SEEN_A; - emit_sll(r_A, r_A, k, ctx); - break; - case BPF_ALU | BPF_LSH | BPF_X: - /* A <<= X */ - ctx->flags |= SEEN_A | SEEN_X; - emit_sllv(r_A, r_A, r_X, ctx); - break; - case BPF_ALU | BPF_RSH | BPF_K: - /* A >>= K */ - ctx->flags |= SEEN_A; - emit_srl(r_A, r_A, k, ctx); - break; - case BPF_ALU | BPF_RSH | BPF_X: - ctx->flags |= SEEN_A | SEEN_X; - emit_srlv(r_A, r_A, r_X, ctx); - break; - case BPF_ALU | BPF_NEG: - /* A = -A */ - ctx->flags |= SEEN_A; - emit_neg(r_A, ctx); - break; - case BPF_JMP | BPF_JA: - /* pc += K */ - b_off = b_imm(i + k + 1, ctx); - if (is_bad_offset(b_off)) - return -E2BIG; - emit_b(b_off, ctx); - emit_nop(ctx); - break; - case BPF_JMP | BPF_JEQ | BPF_K: - /* pc += ( A == K ) ? pc->jt : pc->jf */ - condt = MIPS_COND_EQ | MIPS_COND_K; - goto jmp_cmp; - case BPF_JMP | BPF_JEQ | BPF_X: - ctx->flags |= SEEN_X; - /* pc += ( A == X ) ? pc->jt : pc->jf */ - condt = MIPS_COND_EQ | MIPS_COND_X; - goto jmp_cmp; - case BPF_JMP | BPF_JGE | BPF_K: - /* pc += ( A >= K ) ? pc->jt : pc->jf */ - condt = MIPS_COND_GE | MIPS_COND_K; - goto jmp_cmp; - case BPF_JMP | BPF_JGE | BPF_X: - ctx->flags |= SEEN_X; - /* pc += ( A >= X ) ? pc->jt : pc->jf */ - condt = MIPS_COND_GE | MIPS_COND_X; - goto jmp_cmp; - case BPF_JMP | BPF_JGT | BPF_K: - /* pc += ( A > K ) ? pc->jt : pc->jf */ - condt = MIPS_COND_GT | MIPS_COND_K; - goto jmp_cmp; - case BPF_JMP | BPF_JGT | BPF_X: - ctx->flags |= SEEN_X; - /* pc += ( A > X ) ? pc->jt : pc->jf */ - condt = MIPS_COND_GT | MIPS_COND_X; -jmp_cmp: - /* Greater or Equal */ - if ((condt & MIPS_COND_GE) || - (condt & MIPS_COND_GT)) { - if (condt & MIPS_COND_K) { /* K */ - ctx->flags |= SEEN_A; - emit_sltiu(r_s0, r_A, k, ctx); - } else { /* X */ - ctx->flags |= SEEN_A | - SEEN_X; - emit_sltu(r_s0, r_A, r_X, ctx); - } - /* A < (K|X) ? r_scrach = 1 */ - b_off = b_imm(i + inst->jf + 1, ctx); - emit_bcond(MIPS_COND_NE, r_s0, r_zero, b_off, - ctx); - emit_nop(ctx); - /* A > (K|X) ? scratch = 0 */ - if (condt & MIPS_COND_GT) { - /* Checking for equality */ - ctx->flags |= SEEN_A | SEEN_X; - if (condt & MIPS_COND_K) - emit_load_imm(r_s0, k, ctx); - else - emit_jit_reg_move(r_s0, r_X, - ctx); - b_off = b_imm(i + inst->jf + 1, ctx); - emit_bcond(MIPS_COND_EQ, r_A, r_s0, - b_off, ctx); - emit_nop(ctx); - /* Finally, A > K|X */ - b_off = b_imm(i + inst->jt + 1, ctx); - emit_b(b_off, ctx); - emit_nop(ctx); - } else { - /* A >= (K|X) so jump */ - b_off = b_imm(i + inst->jt + 1, ctx); - emit_b(b_off, ctx); - emit_nop(ctx); - } - } else { - /* A == K|X */ - if (condt & MIPS_COND_K) { /* K */ - ctx->flags |= SEEN_A; - emit_load_imm(r_s0, k, ctx); - /* jump true */ - b_off = b_imm(i + inst->jt + 1, ctx); - emit_bcond(MIPS_COND_EQ, r_A, r_s0, - b_off, ctx); - emit_nop(ctx); - /* jump false */ - b_off = b_imm(i + inst->jf + 1, - ctx); - emit_bcond(MIPS_COND_NE, r_A, r_s0, - b_off, ctx); - emit_nop(ctx); - } else { /* X */ - /* jump true */ - ctx->flags |= SEEN_A | SEEN_X; - b_off = b_imm(i + inst->jt + 1, - ctx); - emit_bcond(MIPS_COND_EQ, r_A, r_X, - b_off, ctx); - emit_nop(ctx); - /* jump false */ - b_off = b_imm(i + inst->jf + 1, ctx); - emit_bcond(MIPS_COND_NE, r_A, r_X, - b_off, ctx); - emit_nop(ctx); - } - } - break; - case BPF_JMP | BPF_JSET | BPF_K: - ctx->flags |= SEEN_A; - /* pc += (A & K) ? pc -> jt : pc -> jf */ - emit_load_imm(r_s1, k, ctx); - emit_and(r_s0, r_A, r_s1, ctx); - /* jump true */ - b_off = b_imm(i + inst->jt + 1, ctx); - emit_bcond(MIPS_COND_NE, r_s0, r_zero, b_off, ctx); - emit_nop(ctx); - /* jump false */ - b_off = b_imm(i + inst->jf + 1, ctx); - emit_b(b_off, ctx); - emit_nop(ctx); - break; - case BPF_JMP | BPF_JSET | BPF_X: - ctx->flags |= SEEN_X | SEEN_A; - /* pc += (A & X) ? pc -> jt : pc -> jf */ - emit_and(r_s0, r_A, r_X, ctx); - /* jump true */ - b_off = b_imm(i + inst->jt + 1, ctx); - emit_bcond(MIPS_COND_NE, r_s0, r_zero, b_off, ctx); - emit_nop(ctx); - /* jump false */ - b_off = b_imm(i + inst->jf + 1, ctx); - emit_b(b_off, ctx); - emit_nop(ctx); - break; - case BPF_RET | BPF_A: - ctx->flags |= SEEN_A; - if (i != prog->len - 1) { - /* - * If this is not the last instruction - * then jump to the epilogue - */ - b_off = b_imm(prog->len, ctx); - if (is_bad_offset(b_off)) - return -E2BIG; - emit_b(b_off, ctx); - } - emit_reg_move(r_ret, r_A, ctx); /* delay slot */ - break; - case BPF_RET | BPF_K: - /* - * It can emit two instructions so it does not fit on - * the delay slot. - */ - emit_load_imm(r_ret, k, ctx); - if (i != prog->len - 1) { - /* - * If this is not the last instruction - * then jump to the epilogue - */ - b_off = b_imm(prog->len, ctx); - if (is_bad_offset(b_off)) - return -E2BIG; - emit_b(b_off, ctx); - emit_nop(ctx); - } - break; - case BPF_MISC | BPF_TAX: - /* X = A */ - ctx->flags |= SEEN_X | SEEN_A; - emit_jit_reg_move(r_X, r_A, ctx); - break; - case BPF_MISC | BPF_TXA: - /* A = X */ - ctx->flags |= SEEN_A | SEEN_X; - emit_jit_reg_move(r_A, r_X, ctx); - break; - /* AUX */ - case BPF_ANC | SKF_AD_PROTOCOL: - /* A = ntohs(skb->protocol */ - ctx->flags |= SEEN_SKB | SEEN_OFF | SEEN_A; - BUILD_BUG_ON(sizeof_field(struct sk_buff, - protocol) != 2); - off = offsetof(struct sk_buff, protocol); - emit_half_load(r_A, r_skb, off, ctx); -#ifdef CONFIG_CPU_LITTLE_ENDIAN - /* This needs little endian fixup */ - if (cpu_has_wsbh) { - /* R2 and later have the wsbh instruction */ - emit_wsbh(r_A, r_A, ctx); - } else { - /* Get first byte */ - emit_andi(r_tmp_imm, r_A, 0xff, ctx); - /* Shift it */ - emit_sll(r_tmp, r_tmp_imm, 8, ctx); - /* Get second byte */ - emit_srl(r_tmp_imm, r_A, 8, ctx); - emit_andi(r_tmp_imm, r_tmp_imm, 0xff, ctx); - /* Put everyting together in r_A */ - emit_or(r_A, r_tmp, r_tmp_imm, ctx); - } -#endif - break; - case BPF_ANC | SKF_AD_CPU: - ctx->flags |= SEEN_A | SEEN_OFF; - /* A = current_thread_info()->cpu */ - BUILD_BUG_ON(sizeof_field(struct thread_info, - cpu) != 4); - off = offsetof(struct thread_info, cpu); - /* $28/gp points to the thread_info struct */ - emit_load(r_A, 28, off, ctx); - break; - case BPF_ANC | SKF_AD_IFINDEX: - /* A = skb->dev->ifindex */ - case BPF_ANC | SKF_AD_HATYPE: - /* A = skb->dev->type */ - ctx->flags |= SEEN_SKB | SEEN_A; - off = offsetof(struct sk_buff, dev); - /* Load *dev pointer */ - emit_load_ptr(r_s0, r_skb, off, ctx); - /* error (0) in the delay slot */ - b_off = b_imm(prog->len, ctx); - if (is_bad_offset(b_off)) - return -E2BIG; - emit_bcond(MIPS_COND_EQ, r_s0, r_zero, b_off, ctx); - emit_reg_move(r_ret, r_zero, ctx); - if (code == (BPF_ANC | SKF_AD_IFINDEX)) { - BUILD_BUG_ON(sizeof_field(struct net_device, ifindex) != 4); - off = offsetof(struct net_device, ifindex); - emit_load(r_A, r_s0, off, ctx); - } else { /* (code == (BPF_ANC | SKF_AD_HATYPE) */ - BUILD_BUG_ON(sizeof_field(struct net_device, type) != 2); - off = offsetof(struct net_device, type); - emit_half_load_unsigned(r_A, r_s0, off, ctx); - } - break; - case BPF_ANC | SKF_AD_MARK: - ctx->flags |= SEEN_SKB | SEEN_A; - BUILD_BUG_ON(sizeof_field(struct sk_buff, mark) != 4); - off = offsetof(struct sk_buff, mark); - emit_load(r_A, r_skb, off, ctx); - break; - case BPF_ANC | SKF_AD_RXHASH: - ctx->flags |= SEEN_SKB | SEEN_A; - BUILD_BUG_ON(sizeof_field(struct sk_buff, hash) != 4); - off = offsetof(struct sk_buff, hash); - emit_load(r_A, r_skb, off, ctx); - break; - case BPF_ANC | SKF_AD_VLAN_TAG: - ctx->flags |= SEEN_SKB | SEEN_A; - BUILD_BUG_ON(sizeof_field(struct sk_buff, - vlan_tci) != 2); - off = offsetof(struct sk_buff, vlan_tci); - emit_half_load_unsigned(r_A, r_skb, off, ctx); - break; - case BPF_ANC | SKF_AD_VLAN_TAG_PRESENT: - ctx->flags |= SEEN_SKB | SEEN_A; - emit_load_byte(r_A, r_skb, PKT_VLAN_PRESENT_OFFSET(), ctx); - if (PKT_VLAN_PRESENT_BIT) - emit_srl(r_A, r_A, PKT_VLAN_PRESENT_BIT, ctx); - if (PKT_VLAN_PRESENT_BIT < 7) - emit_andi(r_A, r_A, 1, ctx); - break; - case BPF_ANC | SKF_AD_PKTTYPE: - ctx->flags |= SEEN_SKB; - - emit_load_byte(r_tmp, r_skb, PKT_TYPE_OFFSET(), ctx); - /* Keep only the last 3 bits */ - emit_andi(r_A, r_tmp, PKT_TYPE_MAX, ctx); -#ifdef __BIG_ENDIAN_BITFIELD - /* Get the actual packet type to the lower 3 bits */ - emit_srl(r_A, r_A, 5, ctx); -#endif - break; - case BPF_ANC | SKF_AD_QUEUE: - ctx->flags |= SEEN_SKB | SEEN_A; - BUILD_BUG_ON(sizeof_field(struct sk_buff, - queue_mapping) != 2); - BUILD_BUG_ON(offsetof(struct sk_buff, - queue_mapping) > 0xff); - off = offsetof(struct sk_buff, queue_mapping); - emit_half_load_unsigned(r_A, r_skb, off, ctx); - break; - default: - pr_debug("%s: Unhandled opcode: 0x%02x\n", __FILE__, - inst->code); - return -1; - } - } - - /* compute offsets only during the first pass */ - if (ctx->target == NULL) - ctx->offsets[i] = ctx->idx * 4; - - return 0; -} - -void bpf_jit_compile(struct bpf_prog *fp) -{ - struct jit_ctx ctx; - unsigned int alloc_size, tmp_idx; - - if (!bpf_jit_enable) - return; - - memset(&ctx, 0, sizeof(ctx)); - - ctx.offsets = kcalloc(fp->len + 1, sizeof(*ctx.offsets), GFP_KERNEL); - if (ctx.offsets == NULL) - return; - - ctx.skf = fp; - - if (build_body(&ctx)) - goto out; - - tmp_idx = ctx.idx; - build_prologue(&ctx); - ctx.prologue_bytes = (ctx.idx - tmp_idx) * 4; - /* just to complete the ctx.idx count */ - build_epilogue(&ctx); - - alloc_size = 4 * ctx.idx; - ctx.target = module_alloc(alloc_size); - if (ctx.target == NULL) - goto out; - - /* Clean it */ - memset(ctx.target, 0, alloc_size); - - ctx.idx = 0; - - /* Generate the actual JIT code */ - build_prologue(&ctx); - if (build_body(&ctx)) { - module_memfree(ctx.target); - goto out; - } - build_epilogue(&ctx); - - /* Update the icache */ - flush_icache_range((ptr)ctx.target, (ptr)(ctx.target + ctx.idx)); - - if (bpf_jit_enable > 1) - /* Dump JIT code */ - bpf_jit_dump(fp->len, alloc_size, 2, ctx.target); - - fp->bpf_func = (void *)ctx.target; - fp->jited = 1; - -out: - kfree(ctx.offsets); -} - -void bpf_jit_free(struct bpf_prog *fp) -{ - if (fp->jited) - module_memfree(fp->bpf_func); - - bpf_prog_unlock_free(fp); -} diff --git a/arch/mips/net/bpf_jit.h b/arch/mips/net/bpf_jit.h deleted file mode 100644 index 166ca06c9da9c..0000000000000 --- a/arch/mips/net/bpf_jit.h +++ /dev/null @@ -1,81 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0-only */ -/* - * Just-In-Time compiler for BPF filters on MIPS - * - * Copyright (c) 2014 Imagination Technologies Ltd. - * Author: Markos Chandras - */ - -#ifndef BPF_JIT_MIPS_OP_H -#define BPF_JIT_MIPS_OP_H - -/* Registers used by JIT */ -#define MIPS_R_ZERO 0 -#define MIPS_R_V0 2 -#define MIPS_R_A0 4 -#define MIPS_R_A1 5 -#define MIPS_R_T4 12 -#define MIPS_R_T5 13 -#define MIPS_R_T6 14 -#define MIPS_R_T7 15 -#define MIPS_R_S0 16 -#define MIPS_R_S1 17 -#define MIPS_R_S2 18 -#define MIPS_R_S3 19 -#define MIPS_R_S4 20 -#define MIPS_R_S5 21 -#define MIPS_R_S6 22 -#define MIPS_R_S7 23 -#define MIPS_R_SP 29 -#define MIPS_R_RA 31 - -/* Conditional codes */ -#define MIPS_COND_EQ 0x1 -#define MIPS_COND_GE (0x1 << 1) -#define MIPS_COND_GT (0x1 << 2) -#define MIPS_COND_NE (0x1 << 3) -#define MIPS_COND_ALL (0x1 << 4) -/* Conditionals on X register or K immediate */ -#define MIPS_COND_X (0x1 << 5) -#define MIPS_COND_K (0x1 << 6) - -#define r_ret MIPS_R_V0 - -/* - * Use 2 scratch registers to avoid pipeline interlocks. - * There is no overhead during epilogue and prologue since - * any of the $s0-$s6 registers will only be preserved if - * they are going to actually be used. - */ -#define r_skb_hl MIPS_R_S0 /* skb header length */ -#define r_skb_data MIPS_R_S1 /* skb actual data */ -#define r_off MIPS_R_S2 -#define r_A MIPS_R_S3 -#define r_X MIPS_R_S4 -#define r_skb MIPS_R_S5 -#define r_M MIPS_R_S6 -#define r_skb_len MIPS_R_S7 -#define r_s0 MIPS_R_T4 /* scratch reg 1 */ -#define r_s1 MIPS_R_T5 /* scratch reg 2 */ -#define r_tmp_imm MIPS_R_T6 /* No need to preserve this */ -#define r_tmp MIPS_R_T7 /* No need to preserve this */ -#define r_zero MIPS_R_ZERO -#define r_sp MIPS_R_SP -#define r_ra MIPS_R_RA - -#ifndef __ASSEMBLY__ - -/* Declare ASM helpers */ - -#define DECLARE_LOAD_FUNC(func) \ - extern u8 func(unsigned long *skb, int offset); \ - extern u8 func##_negative(unsigned long *skb, int offset); \ - extern u8 func##_positive(unsigned long *skb, int offset) - -DECLARE_LOAD_FUNC(sk_load_word); -DECLARE_LOAD_FUNC(sk_load_half); -DECLARE_LOAD_FUNC(sk_load_byte); - -#endif - -#endif /* BPF_JIT_MIPS_OP_H */ diff --git a/arch/mips/net/bpf_jit_asm.S b/arch/mips/net/bpf_jit_asm.S deleted file mode 100644 index 57154c5883b6f..0000000000000 --- a/arch/mips/net/bpf_jit_asm.S +++ /dev/null @@ -1,285 +0,0 @@ -/* - * bpf_jib_asm.S: Packet/header access helper functions for MIPS/MIPS64 BPF - * compiler. - * - * Copyright (C) 2015 Imagination Technologies Ltd. - * Author: Markos Chandras - * - * This program is free software; you can redistribute it and/or modify it - * under the terms of the GNU General Public License as published by the - * Free Software Foundation; version 2 of the License. - */ - -#include -#include -#include -#include "bpf_jit.h" - -/* ABI - * - * r_skb_hl skb header length - * r_skb_data skb data - * r_off(a1) offset register - * r_A BPF register A - * r_X PF register X - * r_skb(a0) *skb - * r_M *scratch memory - * r_skb_le skb length - * r_s0 Scratch register 0 - * r_s1 Scratch register 1 - * - * On entry: - * a0: *skb - * a1: offset (imm or imm + X) - * - * All non-BPF-ABI registers are free for use. On return, we only - * care about r_ret. The BPF-ABI registers are assumed to remain - * unmodified during the entire filter operation. - */ - -#define skb a0 -#define offset a1 -#define SKF_LL_OFF (-0x200000) /* Can't include linux/filter.h in assembly */ - - /* We know better :) so prevent assembler reordering etc */ - .set noreorder - -#define is_offset_negative(TYPE) \ - /* If offset is negative we have more work to do */ \ - slti t0, offset, 0; \ - bgtz t0, bpf_slow_path_##TYPE##_neg; \ - /* Be careful what follows in DS. */ - -#define is_offset_in_header(SIZE, TYPE) \ - /* Reading from header? */ \ - addiu $r_s0, $r_skb_hl, -SIZE; \ - slt t0, $r_s0, offset; \ - bgtz t0, bpf_slow_path_##TYPE; \ - -LEAF(sk_load_word) - is_offset_negative(word) -FEXPORT(sk_load_word_positive) - is_offset_in_header(4, word) - /* Offset within header boundaries */ - PTR_ADDU t1, $r_skb_data, offset - .set reorder - lw $r_A, 0(t1) - .set noreorder -#ifdef CONFIG_CPU_LITTLE_ENDIAN -# if MIPS_ISA_REV >= 2 - wsbh t0, $r_A - rotr $r_A, t0, 16 -# else - sll t0, $r_A, 24 - srl t1, $r_A, 24 - srl t2, $r_A, 8 - or t0, t0, t1 - andi t2, t2, 0xff00 - andi t1, $r_A, 0xff00 - or t0, t0, t2 - sll t1, t1, 8 - or $r_A, t0, t1 -# endif -#endif - jr $r_ra - move $r_ret, zero - END(sk_load_word) - -LEAF(sk_load_half) - is_offset_negative(half) -FEXPORT(sk_load_half_positive) - is_offset_in_header(2, half) - /* Offset within header boundaries */ - PTR_ADDU t1, $r_skb_data, offset - lhu $r_A, 0(t1) -#ifdef CONFIG_CPU_LITTLE_ENDIAN -# if MIPS_ISA_REV >= 2 - wsbh $r_A, $r_A -# else - sll t0, $r_A, 8 - srl t1, $r_A, 8 - andi t0, t0, 0xff00 - or $r_A, t0, t1 -# endif -#endif - jr $r_ra - move $r_ret, zero - END(sk_load_half) - -LEAF(sk_load_byte) - is_offset_negative(byte) -FEXPORT(sk_load_byte_positive) - is_offset_in_header(1, byte) - /* Offset within header boundaries */ - PTR_ADDU t1, $r_skb_data, offset - lbu $r_A, 0(t1) - jr $r_ra - move $r_ret, zero - END(sk_load_byte) - -/* - * call skb_copy_bits: - * (prototype in linux/skbuff.h) - * - * int skb_copy_bits(sk_buff *skb, int offset, void *to, int len) - * - * o32 mandates we leave 4 spaces for argument registers in case - * the callee needs to use them. Even though we don't care about - * the argument registers ourselves, we need to allocate that space - * to remain ABI compliant since the callee may want to use that space. - * We also allocate 2 more spaces for $r_ra and our return register (*to). - * - * n64 is a bit different. The *caller* will allocate the space to preserve - * the arguments. So in 64-bit kernels, we allocate the 4-arg space for no - * good reason but it does not matter that much really. - * - * (void *to) is returned in r_s0 - * - */ -#ifdef CONFIG_CPU_LITTLE_ENDIAN -#define DS_OFFSET(SIZE) (4 * SZREG) -#else -#define DS_OFFSET(SIZE) ((4 * SZREG) + (4 - SIZE)) -#endif -#define bpf_slow_path_common(SIZE) \ - /* Quick check. Are we within reasonable boundaries? */ \ - LONG_ADDIU $r_s1, $r_skb_len, -SIZE; \ - sltu $r_s0, offset, $r_s1; \ - beqz $r_s0, fault; \ - /* Load 4th argument in DS */ \ - LONG_ADDIU a3, zero, SIZE; \ - PTR_ADDIU $r_sp, $r_sp, -(6 * SZREG); \ - PTR_LA t0, skb_copy_bits; \ - PTR_S $r_ra, (5 * SZREG)($r_sp); \ - /* Assign low slot to a2 */ \ - PTR_ADDIU a2, $r_sp, DS_OFFSET(SIZE); \ - jalr t0; \ - /* Reset our destination slot (DS but it's ok) */ \ - INT_S zero, (4 * SZREG)($r_sp); \ - /* \ - * skb_copy_bits returns 0 on success and -EFAULT \ - * on error. Our data live in a2. Do not bother with \ - * our data if an error has been returned. \ - */ \ - /* Restore our frame */ \ - PTR_L $r_ra, (5 * SZREG)($r_sp); \ - INT_L $r_s0, (4 * SZREG)($r_sp); \ - bltz v0, fault; \ - PTR_ADDIU $r_sp, $r_sp, 6 * SZREG; \ - move $r_ret, zero; \ - -NESTED(bpf_slow_path_word, (6 * SZREG), $r_sp) - bpf_slow_path_common(4) -#ifdef CONFIG_CPU_LITTLE_ENDIAN -# if MIPS_ISA_REV >= 2 - wsbh t0, $r_s0 - jr $r_ra - rotr $r_A, t0, 16 -# else - sll t0, $r_s0, 24 - srl t1, $r_s0, 24 - srl t2, $r_s0, 8 - or t0, t0, t1 - andi t2, t2, 0xff00 - andi t1, $r_s0, 0xff00 - or t0, t0, t2 - sll t1, t1, 8 - jr $r_ra - or $r_A, t0, t1 -# endif -#else - jr $r_ra - move $r_A, $r_s0 -#endif - - END(bpf_slow_path_word) - -NESTED(bpf_slow_path_half, (6 * SZREG), $r_sp) - bpf_slow_path_common(2) -#ifdef CONFIG_CPU_LITTLE_ENDIAN -# if MIPS_ISA_REV >= 2 - jr $r_ra - wsbh $r_A, $r_s0 -# else - sll t0, $r_s0, 8 - andi t1, $r_s0, 0xff00 - andi t0, t0, 0xff00 - srl t1, t1, 8 - jr $r_ra - or $r_A, t0, t1 -# endif -#else - jr $r_ra - move $r_A, $r_s0 -#endif - - END(bpf_slow_path_half) - -NESTED(bpf_slow_path_byte, (6 * SZREG), $r_sp) - bpf_slow_path_common(1) - jr $r_ra - move $r_A, $r_s0 - - END(bpf_slow_path_byte) - -/* - * Negative entry points - */ - .macro bpf_is_end_of_data - li t0, SKF_LL_OFF - /* Reading link layer data? */ - slt t1, offset, t0 - bgtz t1, fault - /* Be careful what follows in DS. */ - .endm -/* - * call skb_copy_bits: - * (prototype in linux/filter.h) - * - * void *bpf_internal_load_pointer_neg_helper(const struct sk_buff *skb, - * int k, unsigned int size) - * - * see above (bpf_slow_path_common) for ABI restrictions - */ -#define bpf_negative_common(SIZE) \ - PTR_ADDIU $r_sp, $r_sp, -(6 * SZREG); \ - PTR_LA t0, bpf_internal_load_pointer_neg_helper; \ - PTR_S $r_ra, (5 * SZREG)($r_sp); \ - jalr t0; \ - li a2, SIZE; \ - PTR_L $r_ra, (5 * SZREG)($r_sp); \ - /* Check return pointer */ \ - beqz v0, fault; \ - PTR_ADDIU $r_sp, $r_sp, 6 * SZREG; \ - /* Preserve our pointer */ \ - move $r_s0, v0; \ - /* Set return value */ \ - move $r_ret, zero; \ - -bpf_slow_path_word_neg: - bpf_is_end_of_data -NESTED(sk_load_word_negative, (6 * SZREG), $r_sp) - bpf_negative_common(4) - jr $r_ra - lw $r_A, 0($r_s0) - END(sk_load_word_negative) - -bpf_slow_path_half_neg: - bpf_is_end_of_data -NESTED(sk_load_half_negative, (6 * SZREG), $r_sp) - bpf_negative_common(2) - jr $r_ra - lhu $r_A, 0($r_s0) - END(sk_load_half_negative) - -bpf_slow_path_byte_neg: - bpf_is_end_of_data -NESTED(sk_load_byte_negative, (6 * SZREG), $r_sp) - bpf_negative_common(1) - jr $r_ra - lbu $r_A, 0($r_s0) - END(sk_load_byte_negative) - -fault: - jr $r_ra - addiu $r_ret, zero, 1 diff --git a/arch/mips/net/ebpf_jit.c b/arch/mips/net/ebpf_jit.c deleted file mode 100644 index 3a73e93757121..0000000000000 --- a/arch/mips/net/ebpf_jit.c +++ /dev/null @@ -1,1938 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0-only -/* - * Just-In-Time compiler for eBPF filters on MIPS - * - * Copyright (c) 2017 Cavium, Inc. - * - * Based on code from: - * - * Copyright (c) 2014 Imagination Technologies Ltd. - * Author: Markos Chandras - */ - -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include - -/* Registers used by JIT */ -#define MIPS_R_ZERO 0 -#define MIPS_R_AT 1 -#define MIPS_R_V0 2 /* BPF_R0 */ -#define MIPS_R_V1 3 -#define MIPS_R_A0 4 /* BPF_R1 */ -#define MIPS_R_A1 5 /* BPF_R2 */ -#define MIPS_R_A2 6 /* BPF_R3 */ -#define MIPS_R_A3 7 /* BPF_R4 */ -#define MIPS_R_A4 8 /* BPF_R5 */ -#define MIPS_R_T4 12 /* BPF_AX */ -#define MIPS_R_T5 13 -#define MIPS_R_T6 14 -#define MIPS_R_T7 15 -#define MIPS_R_S0 16 /* BPF_R6 */ -#define MIPS_R_S1 17 /* BPF_R7 */ -#define MIPS_R_S2 18 /* BPF_R8 */ -#define MIPS_R_S3 19 /* BPF_R9 */ -#define MIPS_R_S4 20 /* BPF_TCC */ -#define MIPS_R_S5 21 -#define MIPS_R_S6 22 -#define MIPS_R_S7 23 -#define MIPS_R_T8 24 -#define MIPS_R_T9 25 -#define MIPS_R_SP 29 -#define MIPS_R_RA 31 - -/* eBPF flags */ -#define EBPF_SAVE_S0 BIT(0) -#define EBPF_SAVE_S1 BIT(1) -#define EBPF_SAVE_S2 BIT(2) -#define EBPF_SAVE_S3 BIT(3) -#define EBPF_SAVE_S4 BIT(4) -#define EBPF_SAVE_RA BIT(5) -#define EBPF_SEEN_FP BIT(6) -#define EBPF_SEEN_TC BIT(7) -#define EBPF_TCC_IN_V1 BIT(8) - -/* - * For the mips64 ISA, we need to track the value range or type for - * each JIT register. The BPF machine requires zero extended 32-bit - * values, but the mips64 ISA requires sign extended 32-bit values. - * At each point in the BPF program we track the state of every - * register so that we can zero extend or sign extend as the BPF - * semantics require. - */ -enum reg_val_type { - /* uninitialized */ - REG_UNKNOWN, - /* not known to be 32-bit compatible. */ - REG_64BIT, - /* 32-bit compatible, no truncation needed for 64-bit ops. */ - REG_64BIT_32BIT, - /* 32-bit compatible, need truncation for 64-bit ops. */ - REG_32BIT, - /* 32-bit no sign/zero extension needed. */ - REG_32BIT_POS -}; - -/* - * high bit of offsets indicates if long branch conversion done at - * this insn. - */ -#define OFFSETS_B_CONV BIT(31) - -/** - * struct jit_ctx - JIT context - * @skf: The sk_filter - * @stack_size: eBPF stack size - * @idx: Instruction index - * @flags: JIT flags - * @offsets: Instruction offsets - * @target: Memory location for the compiled filter - * @reg_val_types Packed enum reg_val_type for each register. - */ -struct jit_ctx { - const struct bpf_prog *skf; - int stack_size; - u32 idx; - u32 flags; - u32 *offsets; - u32 *target; - u64 *reg_val_types; - unsigned int long_b_conversion:1; - unsigned int gen_b_offsets:1; - unsigned int use_bbit_insns:1; -}; - -static void set_reg_val_type(u64 *rvt, int reg, enum reg_val_type type) -{ - *rvt &= ~(7ull << (reg * 3)); - *rvt |= ((u64)type << (reg * 3)); -} - -static enum reg_val_type get_reg_val_type(const struct jit_ctx *ctx, - int index, int reg) -{ - return (ctx->reg_val_types[index] >> (reg * 3)) & 7; -} - -/* Simply emit the instruction if the JIT memory space has been allocated */ -#define emit_instr_long(ctx, func64, func32, ...) \ -do { \ - if ((ctx)->target != NULL) { \ - u32 *p = &(ctx)->target[ctx->idx]; \ - if (IS_ENABLED(CONFIG_64BIT)) \ - uasm_i_##func64(&p, ##__VA_ARGS__); \ - else \ - uasm_i_##func32(&p, ##__VA_ARGS__); \ - } \ - (ctx)->idx++; \ -} while (0) - -#define emit_instr(ctx, func, ...) \ - emit_instr_long(ctx, func, func, ##__VA_ARGS__) - -static unsigned int j_target(struct jit_ctx *ctx, int target_idx) -{ - unsigned long target_va, base_va; - unsigned int r; - - if (!ctx->target) - return 0; - - base_va = (unsigned long)ctx->target; - target_va = base_va + (ctx->offsets[target_idx] & ~OFFSETS_B_CONV); - - if ((base_va & ~0x0ffffffful) != (target_va & ~0x0ffffffful)) - return (unsigned int)-1; - r = target_va & 0x0ffffffful; - return r; -} - -/* Compute the immediate value for PC-relative branches. */ -static u32 b_imm(unsigned int tgt, struct jit_ctx *ctx) -{ - if (!ctx->gen_b_offsets) - return 0; - - /* - * We want a pc-relative branch. tgt is the instruction offset - * we want to jump to. - - * Branch on MIPS: - * I: target_offset <- sign_extend(offset) - * I+1: PC += target_offset (delay slot) - * - * ctx->idx currently points to the branch instruction - * but the offset is added to the delay slot so we need - * to subtract 4. - */ - return (ctx->offsets[tgt] & ~OFFSETS_B_CONV) - - (ctx->idx * 4) - 4; -} - -enum which_ebpf_reg { - src_reg, - src_reg_no_fp, - dst_reg, - dst_reg_fp_ok -}; - -/* - * For eBPF, the register mapping naturally falls out of the - * requirements of eBPF and the MIPS n64 ABI. We don't maintain a - * separate frame pointer, so BPF_REG_10 relative accesses are - * adjusted to be $sp relative. - */ -static int ebpf_to_mips_reg(struct jit_ctx *ctx, - const struct bpf_insn *insn, - enum which_ebpf_reg w) -{ - int ebpf_reg = (w == src_reg || w == src_reg_no_fp) ? - insn->src_reg : insn->dst_reg; - - switch (ebpf_reg) { - case BPF_REG_0: - return MIPS_R_V0; - case BPF_REG_1: - return MIPS_R_A0; - case BPF_REG_2: - return MIPS_R_A1; - case BPF_REG_3: - return MIPS_R_A2; - case BPF_REG_4: - return MIPS_R_A3; - case BPF_REG_5: - return MIPS_R_A4; - case BPF_REG_6: - ctx->flags |= EBPF_SAVE_S0; - return MIPS_R_S0; - case BPF_REG_7: - ctx->flags |= EBPF_SAVE_S1; - return MIPS_R_S1; - case BPF_REG_8: - ctx->flags |= EBPF_SAVE_S2; - return MIPS_R_S2; - case BPF_REG_9: - ctx->flags |= EBPF_SAVE_S3; - return MIPS_R_S3; - case BPF_REG_10: - if (w == dst_reg || w == src_reg_no_fp) - goto bad_reg; - ctx->flags |= EBPF_SEEN_FP; - /* - * Needs special handling, return something that - * cannot be clobbered just in case. - */ - return MIPS_R_ZERO; - case BPF_REG_AX: - return MIPS_R_T4; - default: -bad_reg: - WARN(1, "Illegal bpf reg: %d\n", ebpf_reg); - return -EINVAL; - } -} -/* - * eBPF stack frame will be something like: - * - * Entry $sp ------> +--------------------------------+ - * | $ra (optional) | - * +--------------------------------+ - * | $s0 (optional) | - * +--------------------------------+ - * | $s1 (optional) | - * +--------------------------------+ - * | $s2 (optional) | - * +--------------------------------+ - * | $s3 (optional) | - * +--------------------------------+ - * | $s4 (optional) | - * +--------------------------------+ - * | tmp-storage (if $ra saved) | - * $sp + tmp_offset --> +--------------------------------+ <--BPF_REG_10 - * | BPF_REG_10 relative storage | - * | MAX_BPF_STACK (optional) | - * | . | - * | . | - * | . | - * $sp --------> +--------------------------------+ - * - * If BPF_REG_10 is never referenced, then the MAX_BPF_STACK sized - * area is not allocated. - */ -static int gen_int_prologue(struct jit_ctx *ctx) -{ - int stack_adjust = 0; - int store_offset; - int locals_size; - - if (ctx->flags & EBPF_SAVE_RA) - /* - * If RA we are doing a function call and may need - * extra 8-byte tmp area. - */ - stack_adjust += 2 * sizeof(long); - if (ctx->flags & EBPF_SAVE_S0) - stack_adjust += sizeof(long); - if (ctx->flags & EBPF_SAVE_S1) - stack_adjust += sizeof(long); - if (ctx->flags & EBPF_SAVE_S2) - stack_adjust += sizeof(long); - if (ctx->flags & EBPF_SAVE_S3) - stack_adjust += sizeof(long); - if (ctx->flags & EBPF_SAVE_S4) - stack_adjust += sizeof(long); - - BUILD_BUG_ON(MAX_BPF_STACK & 7); - locals_size = (ctx->flags & EBPF_SEEN_FP) ? MAX_BPF_STACK : 0; - - stack_adjust += locals_size; - - ctx->stack_size = stack_adjust; - - /* - * First instruction initializes the tail call count (TCC). - * On tail call we skip this instruction, and the TCC is - * passed in $v1 from the caller. - */ - emit_instr(ctx, addiu, MIPS_R_V1, MIPS_R_ZERO, MAX_TAIL_CALL_CNT); - if (stack_adjust) - emit_instr_long(ctx, daddiu, addiu, - MIPS_R_SP, MIPS_R_SP, -stack_adjust); - else - return 0; - - store_offset = stack_adjust - sizeof(long); - - if (ctx->flags & EBPF_SAVE_RA) { - emit_instr_long(ctx, sd, sw, - MIPS_R_RA, store_offset, MIPS_R_SP); - store_offset -= sizeof(long); - } - if (ctx->flags & EBPF_SAVE_S0) { - emit_instr_long(ctx, sd, sw, - MIPS_R_S0, store_offset, MIPS_R_SP); - store_offset -= sizeof(long); - } - if (ctx->flags & EBPF_SAVE_S1) { - emit_instr_long(ctx, sd, sw, - MIPS_R_S1, store_offset, MIPS_R_SP); - store_offset -= sizeof(long); - } - if (ctx->flags & EBPF_SAVE_S2) { - emit_instr_long(ctx, sd, sw, - MIPS_R_S2, store_offset, MIPS_R_SP); - store_offset -= sizeof(long); - } - if (ctx->flags & EBPF_SAVE_S3) { - emit_instr_long(ctx, sd, sw, - MIPS_R_S3, store_offset, MIPS_R_SP); - store_offset -= sizeof(long); - } - if (ctx->flags & EBPF_SAVE_S4) { - emit_instr_long(ctx, sd, sw, - MIPS_R_S4, store_offset, MIPS_R_SP); - store_offset -= sizeof(long); - } - - if ((ctx->flags & EBPF_SEEN_TC) && !(ctx->flags & EBPF_TCC_IN_V1)) - emit_instr_long(ctx, daddu, addu, - MIPS_R_S4, MIPS_R_V1, MIPS_R_ZERO); - - return 0; -} - -static int build_int_epilogue(struct jit_ctx *ctx, int dest_reg) -{ - const struct bpf_prog *prog = ctx->skf; - int stack_adjust = ctx->stack_size; - int store_offset = stack_adjust - sizeof(long); - enum reg_val_type td; - int r0 = MIPS_R_V0; - - if (dest_reg == MIPS_R_RA) { - /* Don't let zero extended value escape. */ - td = get_reg_val_type(ctx, prog->len, BPF_REG_0); - if (td == REG_64BIT) - emit_instr(ctx, sll, r0, r0, 0); - } - - if (ctx->flags & EBPF_SAVE_RA) { - emit_instr_long(ctx, ld, lw, - MIPS_R_RA, store_offset, MIPS_R_SP); - store_offset -= sizeof(long); - } - if (ctx->flags & EBPF_SAVE_S0) { - emit_instr_long(ctx, ld, lw, - MIPS_R_S0, store_offset, MIPS_R_SP); - store_offset -= sizeof(long); - } - if (ctx->flags & EBPF_SAVE_S1) { - emit_instr_long(ctx, ld, lw, - MIPS_R_S1, store_offset, MIPS_R_SP); - store_offset -= sizeof(long); - } - if (ctx->flags & EBPF_SAVE_S2) { - emit_instr_long(ctx, ld, lw, - MIPS_R_S2, store_offset, MIPS_R_SP); - store_offset -= sizeof(long); - } - if (ctx->flags & EBPF_SAVE_S3) { - emit_instr_long(ctx, ld, lw, - MIPS_R_S3, store_offset, MIPS_R_SP); - store_offset -= sizeof(long); - } - if (ctx->flags & EBPF_SAVE_S4) { - emit_instr_long(ctx, ld, lw, - MIPS_R_S4, store_offset, MIPS_R_SP); - store_offset -= sizeof(long); - } - emit_instr(ctx, jr, dest_reg); - - if (stack_adjust) - emit_instr_long(ctx, daddiu, addiu, - MIPS_R_SP, MIPS_R_SP, stack_adjust); - else - emit_instr(ctx, nop); - - return 0; -} - -static void gen_imm_to_reg(const struct bpf_insn *insn, int reg, - struct jit_ctx *ctx) -{ - if (insn->imm >= S16_MIN && insn->imm <= S16_MAX) { - emit_instr(ctx, addiu, reg, MIPS_R_ZERO, insn->imm); - } else { - int lower = (s16)(insn->imm & 0xffff); - int upper = insn->imm - lower; - - emit_instr(ctx, lui, reg, upper >> 16); - emit_instr(ctx, addiu, reg, reg, lower); - } -} - -static int gen_imm_insn(const struct bpf_insn *insn, struct jit_ctx *ctx, - int idx) -{ - int upper_bound, lower_bound; - int dst = ebpf_to_mips_reg(ctx, insn, dst_reg); - - if (dst < 0) - return dst; - - switch (BPF_OP(insn->code)) { - case BPF_MOV: - case BPF_ADD: - upper_bound = S16_MAX; - lower_bound = S16_MIN; - break; - case BPF_SUB: - upper_bound = -(int)S16_MIN; - lower_bound = -(int)S16_MAX; - break; - case BPF_AND: - case BPF_OR: - case BPF_XOR: - upper_bound = 0xffff; - lower_bound = 0; - break; - case BPF_RSH: - case BPF_LSH: - case BPF_ARSH: - /* Shift amounts are truncated, no need for bounds */ - upper_bound = S32_MAX; - lower_bound = S32_MIN; - break; - default: - return -EINVAL; - } - - /* - * Immediate move clobbers the register, so no sign/zero - * extension needed. - */ - if (BPF_CLASS(insn->code) == BPF_ALU64 && - BPF_OP(insn->code) != BPF_MOV && - get_reg_val_type(ctx, idx, insn->dst_reg) == REG_32BIT) - emit_instr(ctx, dinsu, dst, MIPS_R_ZERO, 32, 32); - /* BPF_ALU | BPF_LSH doesn't need separate sign extension */ - if (BPF_CLASS(insn->code) == BPF_ALU && - BPF_OP(insn->code) != BPF_LSH && - BPF_OP(insn->code) != BPF_MOV && - get_reg_val_type(ctx, idx, insn->dst_reg) != REG_32BIT) - emit_instr(ctx, sll, dst, dst, 0); - - if (insn->imm >= lower_bound && insn->imm <= upper_bound) { - /* single insn immediate case */ - switch (BPF_OP(insn->code) | BPF_CLASS(insn->code)) { - case BPF_ALU64 | BPF_MOV: - emit_instr(ctx, daddiu, dst, MIPS_R_ZERO, insn->imm); - break; - case BPF_ALU64 | BPF_AND: - case BPF_ALU | BPF_AND: - emit_instr(ctx, andi, dst, dst, insn->imm); - break; - case BPF_ALU64 | BPF_OR: - case BPF_ALU | BPF_OR: - emit_instr(ctx, ori, dst, dst, insn->imm); - break; - case BPF_ALU64 | BPF_XOR: - case BPF_ALU | BPF_XOR: - emit_instr(ctx, xori, dst, dst, insn->imm); - break; - case BPF_ALU64 | BPF_ADD: - emit_instr(ctx, daddiu, dst, dst, insn->imm); - break; - case BPF_ALU64 | BPF_SUB: - emit_instr(ctx, daddiu, dst, dst, -insn->imm); - break; - case BPF_ALU64 | BPF_RSH: - emit_instr(ctx, dsrl_safe, dst, dst, insn->imm & 0x3f); - break; - case BPF_ALU | BPF_RSH: - emit_instr(ctx, srl, dst, dst, insn->imm & 0x1f); - break; - case BPF_ALU64 | BPF_LSH: - emit_instr(ctx, dsll_safe, dst, dst, insn->imm & 0x3f); - break; - case BPF_ALU | BPF_LSH: - emit_instr(ctx, sll, dst, dst, insn->imm & 0x1f); - break; - case BPF_ALU64 | BPF_ARSH: - emit_instr(ctx, dsra_safe, dst, dst, insn->imm & 0x3f); - break; - case BPF_ALU | BPF_ARSH: - emit_instr(ctx, sra, dst, dst, insn->imm & 0x1f); - break; - case BPF_ALU | BPF_MOV: - emit_instr(ctx, addiu, dst, MIPS_R_ZERO, insn->imm); - break; - case BPF_ALU | BPF_ADD: - emit_instr(ctx, addiu, dst, dst, insn->imm); - break; - case BPF_ALU | BPF_SUB: - emit_instr(ctx, addiu, dst, dst, -insn->imm); - break; - default: - return -EINVAL; - } - } else { - /* multi insn immediate case */ - if (BPF_OP(insn->code) == BPF_MOV) { - gen_imm_to_reg(insn, dst, ctx); - } else { - gen_imm_to_reg(insn, MIPS_R_AT, ctx); - switch (BPF_OP(insn->code) | BPF_CLASS(insn->code)) { - case BPF_ALU64 | BPF_AND: - case BPF_ALU | BPF_AND: - emit_instr(ctx, and, dst, dst, MIPS_R_AT); - break; - case BPF_ALU64 | BPF_OR: - case BPF_ALU | BPF_OR: - emit_instr(ctx, or, dst, dst, MIPS_R_AT); - break; - case BPF_ALU64 | BPF_XOR: - case BPF_ALU | BPF_XOR: - emit_instr(ctx, xor, dst, dst, MIPS_R_AT); - break; - case BPF_ALU64 | BPF_ADD: - emit_instr(ctx, daddu, dst, dst, MIPS_R_AT); - break; - case BPF_ALU64 | BPF_SUB: - emit_instr(ctx, dsubu, dst, dst, MIPS_R_AT); - break; - case BPF_ALU | BPF_ADD: - emit_instr(ctx, addu, dst, dst, MIPS_R_AT); - break; - case BPF_ALU | BPF_SUB: - emit_instr(ctx, subu, dst, dst, MIPS_R_AT); - break; - default: - return -EINVAL; - } - } - } - - return 0; -} - -static void emit_const_to_reg(struct jit_ctx *ctx, int dst, u64 value) -{ - if (value >= 0xffffffffffff8000ull || value < 0x8000ull) { - emit_instr(ctx, daddiu, dst, MIPS_R_ZERO, (int)value); - } else if (value >= 0xffffffff80000000ull || - (value < 0x80000000 && value > 0xffff)) { - emit_instr(ctx, lui, dst, (s32)(s16)(value >> 16)); - emit_instr(ctx, ori, dst, dst, (unsigned int)(value & 0xffff)); - } else { - int i; - bool seen_part = false; - int needed_shift = 0; - - for (i = 0; i < 4; i++) { - u64 part = (value >> (16 * (3 - i))) & 0xffff; - - if (seen_part && needed_shift > 0 && (part || i == 3)) { - emit_instr(ctx, dsll_safe, dst, dst, needed_shift); - needed_shift = 0; - } - if (part) { - if (i == 0 || (!seen_part && i < 3 && part < 0x8000)) { - emit_instr(ctx, lui, dst, (s32)(s16)part); - needed_shift = -16; - } else { - emit_instr(ctx, ori, dst, - seen_part ? dst : MIPS_R_ZERO, - (unsigned int)part); - } - seen_part = true; - } - if (seen_part) - needed_shift += 16; - } - } -} - -static int emit_bpf_tail_call(struct jit_ctx *ctx, int this_idx) -{ - int off, b_off; - int tcc_reg; - - ctx->flags |= EBPF_SEEN_TC; - /* - * if (index >= array->map.max_entries) - * goto out; - */ - off = offsetof(struct bpf_array, map.max_entries); - emit_instr(ctx, lwu, MIPS_R_T5, off, MIPS_R_A1); - emit_instr(ctx, sltu, MIPS_R_AT, MIPS_R_T5, MIPS_R_A2); - b_off = b_imm(this_idx + 1, ctx); - emit_instr(ctx, bne, MIPS_R_AT, MIPS_R_ZERO, b_off); - /* - * if (TCC-- < 0) - * goto out; - */ - /* Delay slot */ - tcc_reg = (ctx->flags & EBPF_TCC_IN_V1) ? MIPS_R_V1 : MIPS_R_S4; - emit_instr(ctx, daddiu, MIPS_R_T5, tcc_reg, -1); - b_off = b_imm(this_idx + 1, ctx); - emit_instr(ctx, bltz, tcc_reg, b_off); - /* - * prog = array->ptrs[index]; - * if (prog == NULL) - * goto out; - */ - /* Delay slot */ - emit_instr(ctx, dsll, MIPS_R_T8, MIPS_R_A2, 3); - emit_instr(ctx, daddu, MIPS_R_T8, MIPS_R_T8, MIPS_R_A1); - off = offsetof(struct bpf_array, ptrs); - emit_instr(ctx, ld, MIPS_R_AT, off, MIPS_R_T8); - b_off = b_imm(this_idx + 1, ctx); - emit_instr(ctx, beq, MIPS_R_AT, MIPS_R_ZERO, b_off); - /* Delay slot */ - emit_instr(ctx, nop); - - /* goto *(prog->bpf_func + 4); */ - off = offsetof(struct bpf_prog, bpf_func); - emit_instr(ctx, ld, MIPS_R_T9, off, MIPS_R_AT); - /* All systems are go... propagate TCC */ - emit_instr(ctx, daddu, MIPS_R_V1, MIPS_R_T5, MIPS_R_ZERO); - /* Skip first instruction (TCC initialization) */ - emit_instr(ctx, daddiu, MIPS_R_T9, MIPS_R_T9, 4); - return build_int_epilogue(ctx, MIPS_R_T9); -} - -static bool is_bad_offset(int b_off) -{ - return b_off > 0x1ffff || b_off < -0x20000; -} - -/* Returns the number of insn slots consumed. */ -static int build_one_insn(const struct bpf_insn *insn, struct jit_ctx *ctx, - int this_idx, int exit_idx) -{ - int src, dst, r, td, ts, mem_off, b_off; - bool need_swap, did_move, cmp_eq; - unsigned int target = 0; - u64 t64; - s64 t64s; - int bpf_op = BPF_OP(insn->code); - - if (IS_ENABLED(CONFIG_32BIT) && ((BPF_CLASS(insn->code) == BPF_ALU64) - || (bpf_op == BPF_DW))) - return -EINVAL; - - switch (insn->code) { - case BPF_ALU64 | BPF_ADD | BPF_K: /* ALU64_IMM */ - case BPF_ALU64 | BPF_SUB | BPF_K: /* ALU64_IMM */ - case BPF_ALU64 | BPF_OR | BPF_K: /* ALU64_IMM */ - case BPF_ALU64 | BPF_AND | BPF_K: /* ALU64_IMM */ - case BPF_ALU64 | BPF_LSH | BPF_K: /* ALU64_IMM */ - case BPF_ALU64 | BPF_RSH | BPF_K: /* ALU64_IMM */ - case BPF_ALU64 | BPF_XOR | BPF_K: /* ALU64_IMM */ - case BPF_ALU64 | BPF_ARSH | BPF_K: /* ALU64_IMM */ - case BPF_ALU64 | BPF_MOV | BPF_K: /* ALU64_IMM */ - case BPF_ALU | BPF_MOV | BPF_K: /* ALU32_IMM */ - case BPF_ALU | BPF_ADD | BPF_K: /* ALU32_IMM */ - case BPF_ALU | BPF_SUB | BPF_K: /* ALU32_IMM */ - case BPF_ALU | BPF_OR | BPF_K: /* ALU64_IMM */ - case BPF_ALU | BPF_AND | BPF_K: /* ALU64_IMM */ - case BPF_ALU | BPF_LSH | BPF_K: /* ALU64_IMM */ - case BPF_ALU | BPF_RSH | BPF_K: /* ALU64_IMM */ - case BPF_ALU | BPF_XOR | BPF_K: /* ALU64_IMM */ - case BPF_ALU | BPF_ARSH | BPF_K: /* ALU64_IMM */ - r = gen_imm_insn(insn, ctx, this_idx); - if (r < 0) - return r; - break; - case BPF_ALU64 | BPF_MUL | BPF_K: /* ALU64_IMM */ - dst = ebpf_to_mips_reg(ctx, insn, dst_reg); - if (dst < 0) - return dst; - if (get_reg_val_type(ctx, this_idx, insn->dst_reg) == REG_32BIT) - emit_instr(ctx, dinsu, dst, MIPS_R_ZERO, 32, 32); - if (insn->imm == 1) /* Mult by 1 is a nop */ - break; - gen_imm_to_reg(insn, MIPS_R_AT, ctx); - if (MIPS_ISA_REV >= 6) { - emit_instr(ctx, dmulu, dst, dst, MIPS_R_AT); - } else { - emit_instr(ctx, dmultu, MIPS_R_AT, dst); - emit_instr(ctx, mflo, dst); - } - break; - case BPF_ALU64 | BPF_NEG | BPF_K: /* ALU64_IMM */ - dst = ebpf_to_mips_reg(ctx, insn, dst_reg); - if (dst < 0) - return dst; - if (get_reg_val_type(ctx, this_idx, insn->dst_reg) == REG_32BIT) - emit_instr(ctx, dinsu, dst, MIPS_R_ZERO, 32, 32); - emit_instr(ctx, dsubu, dst, MIPS_R_ZERO, dst); - break; - case BPF_ALU | BPF_MUL | BPF_K: /* ALU_IMM */ - dst = ebpf_to_mips_reg(ctx, insn, dst_reg); - if (dst < 0) - return dst; - td = get_reg_val_type(ctx, this_idx, insn->dst_reg); - if (td == REG_64BIT) { - /* sign extend */ - emit_instr(ctx, sll, dst, dst, 0); - } - if (insn->imm == 1) /* Mult by 1 is a nop */ - break; - gen_imm_to_reg(insn, MIPS_R_AT, ctx); - if (MIPS_ISA_REV >= 6) { - emit_instr(ctx, mulu, dst, dst, MIPS_R_AT); - } else { - emit_instr(ctx, multu, dst, MIPS_R_AT); - emit_instr(ctx, mflo, dst); - } - break; - case BPF_ALU | BPF_NEG | BPF_K: /* ALU_IMM */ - dst = ebpf_to_mips_reg(ctx, insn, dst_reg); - if (dst < 0) - return dst; - td = get_reg_val_type(ctx, this_idx, insn->dst_reg); - if (td == REG_64BIT) { - /* sign extend */ - emit_instr(ctx, sll, dst, dst, 0); - } - emit_instr(ctx, subu, dst, MIPS_R_ZERO, dst); - break; - case BPF_ALU | BPF_DIV | BPF_K: /* ALU_IMM */ - case BPF_ALU | BPF_MOD | BPF_K: /* ALU_IMM */ - if (insn->imm == 0) - return -EINVAL; - dst = ebpf_to_mips_reg(ctx, insn, dst_reg); - if (dst < 0) - return dst; - td = get_reg_val_type(ctx, this_idx, insn->dst_reg); - if (td == REG_64BIT) - /* sign extend */ - emit_instr(ctx, sll, dst, dst, 0); - if (insn->imm == 1) { - /* div by 1 is a nop, mod by 1 is zero */ - if (bpf_op == BPF_MOD) - emit_instr(ctx, addu, dst, MIPS_R_ZERO, MIPS_R_ZERO); - break; - } - gen_imm_to_reg(insn, MIPS_R_AT, ctx); - if (MIPS_ISA_REV >= 6) { - if (bpf_op == BPF_DIV) - emit_instr(ctx, divu_r6, dst, dst, MIPS_R_AT); - else - emit_instr(ctx, modu, dst, dst, MIPS_R_AT); - break; - } - emit_instr(ctx, divu, dst, MIPS_R_AT); - if (bpf_op == BPF_DIV) - emit_instr(ctx, mflo, dst); - else - emit_instr(ctx, mfhi, dst); - break; - case BPF_ALU64 | BPF_DIV | BPF_K: /* ALU_IMM */ - case BPF_ALU64 | BPF_MOD | BPF_K: /* ALU_IMM */ - if (insn->imm == 0) - return -EINVAL; - dst = ebpf_to_mips_reg(ctx, insn, dst_reg); - if (dst < 0) - return dst; - if (get_reg_val_type(ctx, this_idx, insn->dst_reg) == REG_32BIT) - emit_instr(ctx, dinsu, dst, MIPS_R_ZERO, 32, 32); - if (insn->imm == 1) { - /* div by 1 is a nop, mod by 1 is zero */ - if (bpf_op == BPF_MOD) - emit_instr(ctx, addu, dst, MIPS_R_ZERO, MIPS_R_ZERO); - break; - } - gen_imm_to_reg(insn, MIPS_R_AT, ctx); - if (MIPS_ISA_REV >= 6) { - if (bpf_op == BPF_DIV) - emit_instr(ctx, ddivu_r6, dst, dst, MIPS_R_AT); - else - emit_instr(ctx, modu, dst, dst, MIPS_R_AT); - break; - } - emit_instr(ctx, ddivu, dst, MIPS_R_AT); - if (bpf_op == BPF_DIV) - emit_instr(ctx, mflo, dst); - else - emit_instr(ctx, mfhi, dst); - break; - case BPF_ALU64 | BPF_MOV | BPF_X: /* ALU64_REG */ - case BPF_ALU64 | BPF_ADD | BPF_X: /* ALU64_REG */ - case BPF_ALU64 | BPF_SUB | BPF_X: /* ALU64_REG */ - case BPF_ALU64 | BPF_XOR | BPF_X: /* ALU64_REG */ - case BPF_ALU64 | BPF_OR | BPF_X: /* ALU64_REG */ - case BPF_ALU64 | BPF_AND | BPF_X: /* ALU64_REG */ - case BPF_ALU64 | BPF_MUL | BPF_X: /* ALU64_REG */ - case BPF_ALU64 | BPF_DIV | BPF_X: /* ALU64_REG */ - case BPF_ALU64 | BPF_MOD | BPF_X: /* ALU64_REG */ - case BPF_ALU64 | BPF_LSH | BPF_X: /* ALU64_REG */ - case BPF_ALU64 | BPF_RSH | BPF_X: /* ALU64_REG */ - case BPF_ALU64 | BPF_ARSH | BPF_X: /* ALU64_REG */ - src = ebpf_to_mips_reg(ctx, insn, src_reg); - dst = ebpf_to_mips_reg(ctx, insn, dst_reg); - if (src < 0 || dst < 0) - return -EINVAL; - if (get_reg_val_type(ctx, this_idx, insn->dst_reg) == REG_32BIT) - emit_instr(ctx, dinsu, dst, MIPS_R_ZERO, 32, 32); - did_move = false; - if (insn->src_reg == BPF_REG_10) { - if (bpf_op == BPF_MOV) { - emit_instr(ctx, daddiu, dst, MIPS_R_SP, MAX_BPF_STACK); - did_move = true; - } else { - emit_instr(ctx, daddiu, MIPS_R_AT, MIPS_R_SP, MAX_BPF_STACK); - src = MIPS_R_AT; - } - } else if (get_reg_val_type(ctx, this_idx, insn->src_reg) == REG_32BIT) { - int tmp_reg = MIPS_R_AT; - - if (bpf_op == BPF_MOV) { - tmp_reg = dst; - did_move = true; - } - emit_instr(ctx, daddu, tmp_reg, src, MIPS_R_ZERO); - emit_instr(ctx, dinsu, tmp_reg, MIPS_R_ZERO, 32, 32); - src = MIPS_R_AT; - } - switch (bpf_op) { - case BPF_MOV: - if (!did_move) - emit_instr(ctx, daddu, dst, src, MIPS_R_ZERO); - break; - case BPF_ADD: - emit_instr(ctx, daddu, dst, dst, src); - break; - case BPF_SUB: - emit_instr(ctx, dsubu, dst, dst, src); - break; - case BPF_XOR: - emit_instr(ctx, xor, dst, dst, src); - break; - case BPF_OR: - emit_instr(ctx, or, dst, dst, src); - break; - case BPF_AND: - emit_instr(ctx, and, dst, dst, src); - break; - case BPF_MUL: - if (MIPS_ISA_REV >= 6) { - emit_instr(ctx, dmulu, dst, dst, src); - } else { - emit_instr(ctx, dmultu, dst, src); - emit_instr(ctx, mflo, dst); - } - break; - case BPF_DIV: - case BPF_MOD: - if (MIPS_ISA_REV >= 6) { - if (bpf_op == BPF_DIV) - emit_instr(ctx, ddivu_r6, - dst, dst, src); - else - emit_instr(ctx, modu, dst, dst, src); - break; - } - emit_instr(ctx, ddivu, dst, src); - if (bpf_op == BPF_DIV) - emit_instr(ctx, mflo, dst); - else - emit_instr(ctx, mfhi, dst); - break; - case BPF_LSH: - emit_instr(ctx, dsllv, dst, dst, src); - break; - case BPF_RSH: - emit_instr(ctx, dsrlv, dst, dst, src); - break; - case BPF_ARSH: - emit_instr(ctx, dsrav, dst, dst, src); - break; - default: - pr_err("ALU64_REG NOT HANDLED\n"); - return -EINVAL; - } - break; - case BPF_ALU | BPF_MOV | BPF_X: /* ALU_REG */ - case BPF_ALU | BPF_ADD | BPF_X: /* ALU_REG */ - case BPF_ALU | BPF_SUB | BPF_X: /* ALU_REG */ - case BPF_ALU | BPF_XOR | BPF_X: /* ALU_REG */ - case BPF_ALU | BPF_OR | BPF_X: /* ALU_REG */ - case BPF_ALU | BPF_AND | BPF_X: /* ALU_REG */ - case BPF_ALU | BPF_MUL | BPF_X: /* ALU_REG */ - case BPF_ALU | BPF_DIV | BPF_X: /* ALU_REG */ - case BPF_ALU | BPF_MOD | BPF_X: /* ALU_REG */ - case BPF_ALU | BPF_LSH | BPF_X: /* ALU_REG */ - case BPF_ALU | BPF_RSH | BPF_X: /* ALU_REG */ - case BPF_ALU | BPF_ARSH | BPF_X: /* ALU_REG */ - src = ebpf_to_mips_reg(ctx, insn, src_reg_no_fp); - dst = ebpf_to_mips_reg(ctx, insn, dst_reg); - if (src < 0 || dst < 0) - return -EINVAL; - td = get_reg_val_type(ctx, this_idx, insn->dst_reg); - if (td == REG_64BIT) { - /* sign extend */ - emit_instr(ctx, sll, dst, dst, 0); - } - did_move = false; - ts = get_reg_val_type(ctx, this_idx, insn->src_reg); - if (ts == REG_64BIT) { - int tmp_reg = MIPS_R_AT; - - if (bpf_op == BPF_MOV) { - tmp_reg = dst; - did_move = true; - } - /* sign extend */ - emit_instr(ctx, sll, tmp_reg, src, 0); - src = MIPS_R_AT; - } - switch (bpf_op) { - case BPF_MOV: - if (!did_move) - emit_instr(ctx, addu, dst, src, MIPS_R_ZERO); - break; - case BPF_ADD: - emit_instr(ctx, addu, dst, dst, src); - break; - case BPF_SUB: - emit_instr(ctx, subu, dst, dst, src); - break; - case BPF_XOR: - emit_instr(ctx, xor, dst, dst, src); - break; - case BPF_OR: - emit_instr(ctx, or, dst, dst, src); - break; - case BPF_AND: - emit_instr(ctx, and, dst, dst, src); - break; - case BPF_MUL: - emit_instr(ctx, mul, dst, dst, src); - break; - case BPF_DIV: - case BPF_MOD: - if (MIPS_ISA_REV >= 6) { - if (bpf_op == BPF_DIV) - emit_instr(ctx, divu_r6, dst, dst, src); - else - emit_instr(ctx, modu, dst, dst, src); - break; - } - emit_instr(ctx, divu, dst, src); - if (bpf_op == BPF_DIV) - emit_instr(ctx, mflo, dst); - else - emit_instr(ctx, mfhi, dst); - break; - case BPF_LSH: - emit_instr(ctx, sllv, dst, dst, src); - break; - case BPF_RSH: - emit_instr(ctx, srlv, dst, dst, src); - break; - case BPF_ARSH: - emit_instr(ctx, srav, dst, dst, src); - break; - default: - pr_err("ALU_REG NOT HANDLED\n"); - return -EINVAL; - } - break; - case BPF_JMP | BPF_EXIT: - if (this_idx + 1 < exit_idx) { - b_off = b_imm(exit_idx, ctx); - if (is_bad_offset(b_off)) - return -E2BIG; - emit_instr(ctx, beq, MIPS_R_ZERO, MIPS_R_ZERO, b_off); - emit_instr(ctx, nop); - } - break; - case BPF_JMP | BPF_JEQ | BPF_K: /* JMP_IMM */ - case BPF_JMP | BPF_JNE | BPF_K: /* JMP_IMM */ - cmp_eq = (bpf_op == BPF_JEQ); - dst = ebpf_to_mips_reg(ctx, insn, dst_reg_fp_ok); - if (dst < 0) - return dst; - if (insn->imm == 0) { - src = MIPS_R_ZERO; - } else { - gen_imm_to_reg(insn, MIPS_R_AT, ctx); - src = MIPS_R_AT; - } - goto jeq_common; - case BPF_JMP | BPF_JEQ | BPF_X: /* JMP_REG */ - case BPF_JMP | BPF_JNE | BPF_X: - case BPF_JMP | BPF_JSLT | BPF_X: - case BPF_JMP | BPF_JSLE | BPF_X: - case BPF_JMP | BPF_JSGT | BPF_X: - case BPF_JMP | BPF_JSGE | BPF_X: - case BPF_JMP | BPF_JLT | BPF_X: - case BPF_JMP | BPF_JLE | BPF_X: - case BPF_JMP | BPF_JGT | BPF_X: - case BPF_JMP | BPF_JGE | BPF_X: - case BPF_JMP | BPF_JSET | BPF_X: - src = ebpf_to_mips_reg(ctx, insn, src_reg_no_fp); - dst = ebpf_to_mips_reg(ctx, insn, dst_reg); - if (src < 0 || dst < 0) - return -EINVAL; - td = get_reg_val_type(ctx, this_idx, insn->dst_reg); - ts = get_reg_val_type(ctx, this_idx, insn->src_reg); - if (td == REG_32BIT && ts != REG_32BIT) { - emit_instr(ctx, sll, MIPS_R_AT, src, 0); - src = MIPS_R_AT; - } else if (ts == REG_32BIT && td != REG_32BIT) { - emit_instr(ctx, sll, MIPS_R_AT, dst, 0); - dst = MIPS_R_AT; - } - if (bpf_op == BPF_JSET) { - emit_instr(ctx, and, MIPS_R_AT, dst, src); - cmp_eq = false; - dst = MIPS_R_AT; - src = MIPS_R_ZERO; - } else if (bpf_op == BPF_JSGT || bpf_op == BPF_JSLE) { - emit_instr(ctx, dsubu, MIPS_R_AT, dst, src); - if ((insn + 1)->code == (BPF_JMP | BPF_EXIT) && insn->off == 1) { - b_off = b_imm(exit_idx, ctx); - if (is_bad_offset(b_off)) - return -E2BIG; - if (bpf_op == BPF_JSGT) - emit_instr(ctx, blez, MIPS_R_AT, b_off); - else - emit_instr(ctx, bgtz, MIPS_R_AT, b_off); - emit_instr(ctx, nop); - return 2; /* We consumed the exit. */ - } - b_off = b_imm(this_idx + insn->off + 1, ctx); - if (is_bad_offset(b_off)) - return -E2BIG; - if (bpf_op == BPF_JSGT) - emit_instr(ctx, bgtz, MIPS_R_AT, b_off); - else - emit_instr(ctx, blez, MIPS_R_AT, b_off); - emit_instr(ctx, nop); - break; - } else if (bpf_op == BPF_JSGE || bpf_op == BPF_JSLT) { - emit_instr(ctx, slt, MIPS_R_AT, dst, src); - cmp_eq = bpf_op == BPF_JSGE; - dst = MIPS_R_AT; - src = MIPS_R_ZERO; - } else if (bpf_op == BPF_JGT || bpf_op == BPF_JLE) { - /* dst or src could be AT */ - emit_instr(ctx, dsubu, MIPS_R_T8, dst, src); - emit_instr(ctx, sltu, MIPS_R_AT, dst, src); - /* SP known to be non-zero, movz becomes boolean not */ - if (MIPS_ISA_REV >= 6) { - emit_instr(ctx, seleqz, MIPS_R_T9, - MIPS_R_SP, MIPS_R_T8); - } else { - emit_instr(ctx, movz, MIPS_R_T9, - MIPS_R_SP, MIPS_R_T8); - emit_instr(ctx, movn, MIPS_R_T9, - MIPS_R_ZERO, MIPS_R_T8); - } - emit_instr(ctx, or, MIPS_R_AT, MIPS_R_T9, MIPS_R_AT); - cmp_eq = bpf_op == BPF_JGT; - dst = MIPS_R_AT; - src = MIPS_R_ZERO; - } else if (bpf_op == BPF_JGE || bpf_op == BPF_JLT) { - emit_instr(ctx, sltu, MIPS_R_AT, dst, src); - cmp_eq = bpf_op == BPF_JGE; - dst = MIPS_R_AT; - src = MIPS_R_ZERO; - } else { /* JNE/JEQ case */ - cmp_eq = (bpf_op == BPF_JEQ); - } -jeq_common: - /* - * If the next insn is EXIT and we are jumping arround - * only it, invert the sense of the compare and - * conditionally jump to the exit. Poor man's branch - * chaining. - */ - if ((insn + 1)->code == (BPF_JMP | BPF_EXIT) && insn->off == 1) { - b_off = b_imm(exit_idx, ctx); - if (is_bad_offset(b_off)) { - target = j_target(ctx, exit_idx); - if (target == (unsigned int)-1) - return -E2BIG; - cmp_eq = !cmp_eq; - b_off = 4 * 3; - if (!(ctx->offsets[this_idx] & OFFSETS_B_CONV)) { - ctx->offsets[this_idx] |= OFFSETS_B_CONV; - ctx->long_b_conversion = 1; - } - } - - if (cmp_eq) - emit_instr(ctx, bne, dst, src, b_off); - else - emit_instr(ctx, beq, dst, src, b_off); - emit_instr(ctx, nop); - if (ctx->offsets[this_idx] & OFFSETS_B_CONV) { - emit_instr(ctx, j, target); - emit_instr(ctx, nop); - } - return 2; /* We consumed the exit. */ - } - b_off = b_imm(this_idx + insn->off + 1, ctx); - if (is_bad_offset(b_off)) { - target = j_target(ctx, this_idx + insn->off + 1); - if (target == (unsigned int)-1) - return -E2BIG; - cmp_eq = !cmp_eq; - b_off = 4 * 3; - if (!(ctx->offsets[this_idx] & OFFSETS_B_CONV)) { - ctx->offsets[this_idx] |= OFFSETS_B_CONV; - ctx->long_b_conversion = 1; - } - } - - if (cmp_eq) - emit_instr(ctx, beq, dst, src, b_off); - else - emit_instr(ctx, bne, dst, src, b_off); - emit_instr(ctx, nop); - if (ctx->offsets[this_idx] & OFFSETS_B_CONV) { - emit_instr(ctx, j, target); - emit_instr(ctx, nop); - } - break; - case BPF_JMP | BPF_JSGT | BPF_K: /* JMP_IMM */ - case BPF_JMP | BPF_JSGE | BPF_K: /* JMP_IMM */ - case BPF_JMP | BPF_JSLT | BPF_K: /* JMP_IMM */ - case BPF_JMP | BPF_JSLE | BPF_K: /* JMP_IMM */ - cmp_eq = (bpf_op == BPF_JSGE); - dst = ebpf_to_mips_reg(ctx, insn, dst_reg_fp_ok); - if (dst < 0) - return dst; - - if (insn->imm == 0) { - if ((insn + 1)->code == (BPF_JMP | BPF_EXIT) && insn->off == 1) { - b_off = b_imm(exit_idx, ctx); - if (is_bad_offset(b_off)) - return -E2BIG; - switch (bpf_op) { - case BPF_JSGT: - emit_instr(ctx, blez, dst, b_off); - break; - case BPF_JSGE: - emit_instr(ctx, bltz, dst, b_off); - break; - case BPF_JSLT: - emit_instr(ctx, bgez, dst, b_off); - break; - case BPF_JSLE: - emit_instr(ctx, bgtz, dst, b_off); - break; - } - emit_instr(ctx, nop); - return 2; /* We consumed the exit. */ - } - b_off = b_imm(this_idx + insn->off + 1, ctx); - if (is_bad_offset(b_off)) - return -E2BIG; - switch (bpf_op) { - case BPF_JSGT: - emit_instr(ctx, bgtz, dst, b_off); - break; - case BPF_JSGE: - emit_instr(ctx, bgez, dst, b_off); - break; - case BPF_JSLT: - emit_instr(ctx, bltz, dst, b_off); - break; - case BPF_JSLE: - emit_instr(ctx, blez, dst, b_off); - break; - } - emit_instr(ctx, nop); - break; - } - /* - * only "LT" compare available, so we must use imm + 1 - * to generate "GT" and imm -1 to generate LE - */ - if (bpf_op == BPF_JSGT) - t64s = insn->imm + 1; - else if (bpf_op == BPF_JSLE) - t64s = insn->imm + 1; - else - t64s = insn->imm; - - cmp_eq = bpf_op == BPF_JSGT || bpf_op == BPF_JSGE; - if (t64s >= S16_MIN && t64s <= S16_MAX) { - emit_instr(ctx, slti, MIPS_R_AT, dst, (int)t64s); - src = MIPS_R_AT; - dst = MIPS_R_ZERO; - goto jeq_common; - } - emit_const_to_reg(ctx, MIPS_R_AT, (u64)t64s); - emit_instr(ctx, slt, MIPS_R_AT, dst, MIPS_R_AT); - src = MIPS_R_AT; - dst = MIPS_R_ZERO; - goto jeq_common; - - case BPF_JMP | BPF_JGT | BPF_K: - case BPF_JMP | BPF_JGE | BPF_K: - case BPF_JMP | BPF_JLT | BPF_K: - case BPF_JMP | BPF_JLE | BPF_K: - cmp_eq = (bpf_op == BPF_JGE); - dst = ebpf_to_mips_reg(ctx, insn, dst_reg_fp_ok); - if (dst < 0) - return dst; - /* - * only "LT" compare available, so we must use imm + 1 - * to generate "GT" and imm -1 to generate LE - */ - if (bpf_op == BPF_JGT) - t64s = (u64)(u32)(insn->imm) + 1; - else if (bpf_op == BPF_JLE) - t64s = (u64)(u32)(insn->imm) + 1; - else - t64s = (u64)(u32)(insn->imm); - - cmp_eq = bpf_op == BPF_JGT || bpf_op == BPF_JGE; - - emit_const_to_reg(ctx, MIPS_R_AT, (u64)t64s); - emit_instr(ctx, sltu, MIPS_R_AT, dst, MIPS_R_AT); - src = MIPS_R_AT; - dst = MIPS_R_ZERO; - goto jeq_common; - - case BPF_JMP | BPF_JSET | BPF_K: /* JMP_IMM */ - dst = ebpf_to_mips_reg(ctx, insn, dst_reg_fp_ok); - if (dst < 0) - return dst; - - if (ctx->use_bbit_insns && hweight32((u32)insn->imm) == 1) { - if ((insn + 1)->code == (BPF_JMP | BPF_EXIT) && insn->off == 1) { - b_off = b_imm(exit_idx, ctx); - if (is_bad_offset(b_off)) - return -E2BIG; - emit_instr(ctx, bbit0, dst, ffs((u32)insn->imm) - 1, b_off); - emit_instr(ctx, nop); - return 2; /* We consumed the exit. */ - } - b_off = b_imm(this_idx + insn->off + 1, ctx); - if (is_bad_offset(b_off)) - return -E2BIG; - emit_instr(ctx, bbit1, dst, ffs((u32)insn->imm) - 1, b_off); - emit_instr(ctx, nop); - break; - } - t64 = (u32)insn->imm; - emit_const_to_reg(ctx, MIPS_R_AT, t64); - emit_instr(ctx, and, MIPS_R_AT, dst, MIPS_R_AT); - src = MIPS_R_AT; - dst = MIPS_R_ZERO; - cmp_eq = false; - goto jeq_common; - - case BPF_JMP | BPF_JA: - /* - * Prefer relative branch for easier debugging, but - * fall back if needed. - */ - b_off = b_imm(this_idx + insn->off + 1, ctx); - if (is_bad_offset(b_off)) { - target = j_target(ctx, this_idx + insn->off + 1); - if (target == (unsigned int)-1) - return -E2BIG; - emit_instr(ctx, j, target); - } else { - emit_instr(ctx, b, b_off); - } - emit_instr(ctx, nop); - break; - case BPF_LD | BPF_DW | BPF_IMM: - if (insn->src_reg != 0) - return -EINVAL; - dst = ebpf_to_mips_reg(ctx, insn, dst_reg); - if (dst < 0) - return dst; - t64 = ((u64)(u32)insn->imm) | ((u64)(insn + 1)->imm << 32); - emit_const_to_reg(ctx, dst, t64); - return 2; /* Double slot insn */ - - case BPF_JMP | BPF_CALL: - ctx->flags |= EBPF_SAVE_RA; - t64s = (s64)insn->imm + (long)__bpf_call_base; - emit_const_to_reg(ctx, MIPS_R_T9, (u64)t64s); - emit_instr(ctx, jalr, MIPS_R_RA, MIPS_R_T9); - /* delay slot */ - emit_instr(ctx, nop); - break; - - case BPF_JMP | BPF_TAIL_CALL: - if (emit_bpf_tail_call(ctx, this_idx)) - return -EINVAL; - break; - - case BPF_ALU | BPF_END | BPF_FROM_BE: - case BPF_ALU | BPF_END | BPF_FROM_LE: - dst = ebpf_to_mips_reg(ctx, insn, dst_reg); - if (dst < 0) - return dst; - td = get_reg_val_type(ctx, this_idx, insn->dst_reg); - if (insn->imm == 64 && td == REG_32BIT) - emit_instr(ctx, dinsu, dst, MIPS_R_ZERO, 32, 32); - - if (insn->imm != 64 && td == REG_64BIT) { - /* sign extend */ - emit_instr(ctx, sll, dst, dst, 0); - } - -#ifdef __BIG_ENDIAN - need_swap = (BPF_SRC(insn->code) == BPF_FROM_LE); -#else - need_swap = (BPF_SRC(insn->code) == BPF_FROM_BE); -#endif - if (insn->imm == 16) { - if (need_swap) - emit_instr(ctx, wsbh, dst, dst); - emit_instr(ctx, andi, dst, dst, 0xffff); - } else if (insn->imm == 32) { - if (need_swap) { - emit_instr(ctx, wsbh, dst, dst); - emit_instr(ctx, rotr, dst, dst, 16); - } - } else { /* 64-bit*/ - if (need_swap) { - emit_instr(ctx, dsbh, dst, dst); - emit_instr(ctx, dshd, dst, dst); - } - } - break; - - case BPF_ST | BPF_NOSPEC: /* speculation barrier */ - break; - - case BPF_ST | BPF_B | BPF_MEM: - case BPF_ST | BPF_H | BPF_MEM: - case BPF_ST | BPF_W | BPF_MEM: - case BPF_ST | BPF_DW | BPF_MEM: - if (insn->dst_reg == BPF_REG_10) { - ctx->flags |= EBPF_SEEN_FP; - dst = MIPS_R_SP; - mem_off = insn->off + MAX_BPF_STACK; - } else { - dst = ebpf_to_mips_reg(ctx, insn, dst_reg); - if (dst < 0) - return dst; - mem_off = insn->off; - } - gen_imm_to_reg(insn, MIPS_R_AT, ctx); - switch (BPF_SIZE(insn->code)) { - case BPF_B: - emit_instr(ctx, sb, MIPS_R_AT, mem_off, dst); - break; - case BPF_H: - emit_instr(ctx, sh, MIPS_R_AT, mem_off, dst); - break; - case BPF_W: - emit_instr(ctx, sw, MIPS_R_AT, mem_off, dst); - break; - case BPF_DW: - emit_instr(ctx, sd, MIPS_R_AT, mem_off, dst); - break; - } - break; - - case BPF_LDX | BPF_B | BPF_MEM: - case BPF_LDX | BPF_H | BPF_MEM: - case BPF_LDX | BPF_W | BPF_MEM: - case BPF_LDX | BPF_DW | BPF_MEM: - if (insn->src_reg == BPF_REG_10) { - ctx->flags |= EBPF_SEEN_FP; - src = MIPS_R_SP; - mem_off = insn->off + MAX_BPF_STACK; - } else { - src = ebpf_to_mips_reg(ctx, insn, src_reg_no_fp); - if (src < 0) - return src; - mem_off = insn->off; - } - dst = ebpf_to_mips_reg(ctx, insn, dst_reg); - if (dst < 0) - return dst; - switch (BPF_SIZE(insn->code)) { - case BPF_B: - emit_instr(ctx, lbu, dst, mem_off, src); - break; - case BPF_H: - emit_instr(ctx, lhu, dst, mem_off, src); - break; - case BPF_W: - emit_instr(ctx, lw, dst, mem_off, src); - break; - case BPF_DW: - emit_instr(ctx, ld, dst, mem_off, src); - break; - } - break; - - case BPF_STX | BPF_B | BPF_MEM: - case BPF_STX | BPF_H | BPF_MEM: - case BPF_STX | BPF_W | BPF_MEM: - case BPF_STX | BPF_DW | BPF_MEM: - case BPF_STX | BPF_W | BPF_ATOMIC: - case BPF_STX | BPF_DW | BPF_ATOMIC: - if (insn->dst_reg == BPF_REG_10) { - ctx->flags |= EBPF_SEEN_FP; - dst = MIPS_R_SP; - mem_off = insn->off + MAX_BPF_STACK; - } else { - dst = ebpf_to_mips_reg(ctx, insn, dst_reg); - if (dst < 0) - return dst; - mem_off = insn->off; - } - src = ebpf_to_mips_reg(ctx, insn, src_reg_no_fp); - if (src < 0) - return src; - if (BPF_MODE(insn->code) == BPF_ATOMIC) { - if (insn->imm != BPF_ADD) { - pr_err("ATOMIC OP %02x NOT HANDLED\n", insn->imm); - return -EINVAL; - } - - /* - * If mem_off does not fit within the 9 bit ll/sc - * instruction immediate field, use a temp reg. - */ - if (MIPS_ISA_REV >= 6 && - (mem_off >= BIT(8) || mem_off < -BIT(8))) { - emit_instr(ctx, daddiu, MIPS_R_T6, - dst, mem_off); - mem_off = 0; - dst = MIPS_R_T6; - } - switch (BPF_SIZE(insn->code)) { - case BPF_W: - if (get_reg_val_type(ctx, this_idx, insn->src_reg) == REG_32BIT) { - emit_instr(ctx, sll, MIPS_R_AT, src, 0); - src = MIPS_R_AT; - } - emit_instr(ctx, ll, MIPS_R_T8, mem_off, dst); - emit_instr(ctx, addu, MIPS_R_T8, MIPS_R_T8, src); - emit_instr(ctx, sc, MIPS_R_T8, mem_off, dst); - /* - * On failure back up to LL (-4 - * instructions of 4 bytes each - */ - emit_instr(ctx, beq, MIPS_R_T8, MIPS_R_ZERO, -4 * 4); - emit_instr(ctx, nop); - break; - case BPF_DW: - if (get_reg_val_type(ctx, this_idx, insn->src_reg) == REG_32BIT) { - emit_instr(ctx, daddu, MIPS_R_AT, src, MIPS_R_ZERO); - emit_instr(ctx, dinsu, MIPS_R_AT, MIPS_R_ZERO, 32, 32); - src = MIPS_R_AT; - } - emit_instr(ctx, lld, MIPS_R_T8, mem_off, dst); - emit_instr(ctx, daddu, MIPS_R_T8, MIPS_R_T8, src); - emit_instr(ctx, scd, MIPS_R_T8, mem_off, dst); - emit_instr(ctx, beq, MIPS_R_T8, MIPS_R_ZERO, -4 * 4); - emit_instr(ctx, nop); - break; - } - } else { /* BPF_MEM */ - switch (BPF_SIZE(insn->code)) { - case BPF_B: - emit_instr(ctx, sb, src, mem_off, dst); - break; - case BPF_H: - emit_instr(ctx, sh, src, mem_off, dst); - break; - case BPF_W: - emit_instr(ctx, sw, src, mem_off, dst); - break; - case BPF_DW: - if (get_reg_val_type(ctx, this_idx, insn->src_reg) == REG_32BIT) { - emit_instr(ctx, daddu, MIPS_R_AT, src, MIPS_R_ZERO); - emit_instr(ctx, dinsu, MIPS_R_AT, MIPS_R_ZERO, 32, 32); - src = MIPS_R_AT; - } - emit_instr(ctx, sd, src, mem_off, dst); - break; - } - } - break; - - default: - pr_err("NOT HANDLED %d - (%02x)\n", - this_idx, (unsigned int)insn->code); - return -EINVAL; - } - return 1; -} - -#define RVT_VISITED_MASK 0xc000000000000000ull -#define RVT_FALL_THROUGH 0x4000000000000000ull -#define RVT_BRANCH_TAKEN 0x8000000000000000ull -#define RVT_DONE (RVT_FALL_THROUGH | RVT_BRANCH_TAKEN) - -static int build_int_body(struct jit_ctx *ctx) -{ - const struct bpf_prog *prog = ctx->skf; - const struct bpf_insn *insn; - int i, r; - - for (i = 0; i < prog->len; ) { - insn = prog->insnsi + i; - if ((ctx->reg_val_types[i] & RVT_VISITED_MASK) == 0) { - /* dead instruction, don't emit it. */ - i++; - continue; - } - - if (ctx->target == NULL) - ctx->offsets[i] = (ctx->offsets[i] & OFFSETS_B_CONV) | (ctx->idx * 4); - - r = build_one_insn(insn, ctx, i, prog->len); - if (r < 0) - return r; - i += r; - } - /* epilogue offset */ - if (ctx->target == NULL) - ctx->offsets[i] = ctx->idx * 4; - - /* - * All exits have an offset of the epilogue, some offsets may - * not have been set due to banch-around threading, so set - * them now. - */ - if (ctx->target == NULL) - for (i = 0; i < prog->len; i++) { - insn = prog->insnsi + i; - if (insn->code == (BPF_JMP | BPF_EXIT)) - ctx->offsets[i] = ctx->idx * 4; - } - return 0; -} - -/* return the last idx processed, or negative for error */ -static int reg_val_propagate_range(struct jit_ctx *ctx, u64 initial_rvt, - int start_idx, bool follow_taken) -{ - const struct bpf_prog *prog = ctx->skf; - const struct bpf_insn *insn; - u64 exit_rvt = initial_rvt; - u64 *rvt = ctx->reg_val_types; - int idx; - int reg; - - for (idx = start_idx; idx < prog->len; idx++) { - rvt[idx] = (rvt[idx] & RVT_VISITED_MASK) | exit_rvt; - insn = prog->insnsi + idx; - switch (BPF_CLASS(insn->code)) { - case BPF_ALU: - switch (BPF_OP(insn->code)) { - case BPF_ADD: - case BPF_SUB: - case BPF_MUL: - case BPF_DIV: - case BPF_OR: - case BPF_AND: - case BPF_LSH: - case BPF_RSH: - case BPF_NEG: - case BPF_MOD: - case BPF_XOR: - set_reg_val_type(&exit_rvt, insn->dst_reg, REG_32BIT); - break; - case BPF_MOV: - if (BPF_SRC(insn->code)) { - set_reg_val_type(&exit_rvt, insn->dst_reg, REG_32BIT); - } else { - /* IMM to REG move*/ - if (insn->imm >= 0) - set_reg_val_type(&exit_rvt, insn->dst_reg, REG_32BIT_POS); - else - set_reg_val_type(&exit_rvt, insn->dst_reg, REG_32BIT); - } - break; - case BPF_END: - if (insn->imm == 64) - set_reg_val_type(&exit_rvt, insn->dst_reg, REG_64BIT); - else if (insn->imm == 32) - set_reg_val_type(&exit_rvt, insn->dst_reg, REG_32BIT); - else /* insn->imm == 16 */ - set_reg_val_type(&exit_rvt, insn->dst_reg, REG_32BIT_POS); - break; - } - rvt[idx] |= RVT_DONE; - break; - case BPF_ALU64: - switch (BPF_OP(insn->code)) { - case BPF_MOV: - if (BPF_SRC(insn->code)) { - /* REG to REG move*/ - set_reg_val_type(&exit_rvt, insn->dst_reg, REG_64BIT); - } else { - /* IMM to REG move*/ - if (insn->imm >= 0) - set_reg_val_type(&exit_rvt, insn->dst_reg, REG_32BIT_POS); - else - set_reg_val_type(&exit_rvt, insn->dst_reg, REG_64BIT_32BIT); - } - break; - default: - set_reg_val_type(&exit_rvt, insn->dst_reg, REG_64BIT); - } - rvt[idx] |= RVT_DONE; - break; - case BPF_LD: - switch (BPF_SIZE(insn->code)) { - case BPF_DW: - if (BPF_MODE(insn->code) == BPF_IMM) { - s64 val; - - val = (s64)((u32)insn->imm | ((u64)(insn + 1)->imm << 32)); - if (val > 0 && val <= S32_MAX) - set_reg_val_type(&exit_rvt, insn->dst_reg, REG_32BIT_POS); - else if (val >= S32_MIN && val <= S32_MAX) - set_reg_val_type(&exit_rvt, insn->dst_reg, REG_64BIT_32BIT); - else - set_reg_val_type(&exit_rvt, insn->dst_reg, REG_64BIT); - rvt[idx] |= RVT_DONE; - idx++; - } else { - set_reg_val_type(&exit_rvt, insn->dst_reg, REG_64BIT); - } - break; - case BPF_B: - case BPF_H: - set_reg_val_type(&exit_rvt, insn->dst_reg, REG_32BIT_POS); - break; - case BPF_W: - if (BPF_MODE(insn->code) == BPF_IMM) - set_reg_val_type(&exit_rvt, insn->dst_reg, - insn->imm >= 0 ? REG_32BIT_POS : REG_32BIT); - else - set_reg_val_type(&exit_rvt, insn->dst_reg, REG_32BIT); - break; - } - rvt[idx] |= RVT_DONE; - break; - case BPF_LDX: - switch (BPF_SIZE(insn->code)) { - case BPF_DW: - set_reg_val_type(&exit_rvt, insn->dst_reg, REG_64BIT); - break; - case BPF_B: - case BPF_H: - set_reg_val_type(&exit_rvt, insn->dst_reg, REG_32BIT_POS); - break; - case BPF_W: - set_reg_val_type(&exit_rvt, insn->dst_reg, REG_32BIT); - break; - } - rvt[idx] |= RVT_DONE; - break; - case BPF_JMP: - switch (BPF_OP(insn->code)) { - case BPF_EXIT: - rvt[idx] = RVT_DONE | exit_rvt; - rvt[prog->len] = exit_rvt; - return idx; - case BPF_JA: - rvt[idx] |= RVT_DONE; - idx += insn->off; - break; - case BPF_JEQ: - case BPF_JGT: - case BPF_JGE: - case BPF_JLT: - case BPF_JLE: - case BPF_JSET: - case BPF_JNE: - case BPF_JSGT: - case BPF_JSGE: - case BPF_JSLT: - case BPF_JSLE: - if (follow_taken) { - rvt[idx] |= RVT_BRANCH_TAKEN; - idx += insn->off; - follow_taken = false; - } else { - rvt[idx] |= RVT_FALL_THROUGH; - } - break; - case BPF_CALL: - set_reg_val_type(&exit_rvt, BPF_REG_0, REG_64BIT); - /* Upon call return, argument registers are clobbered. */ - for (reg = BPF_REG_0; reg <= BPF_REG_5; reg++) - set_reg_val_type(&exit_rvt, reg, REG_64BIT); - - rvt[idx] |= RVT_DONE; - break; - default: - WARN(1, "Unhandled BPF_JMP case.\n"); - rvt[idx] |= RVT_DONE; - break; - } - break; - default: - rvt[idx] |= RVT_DONE; - break; - } - } - return idx; -} - -/* - * Track the value range (i.e. 32-bit vs. 64-bit) of each register at - * each eBPF insn. This allows unneeded sign and zero extension - * operations to be omitted. - * - * Doesn't handle yet confluence of control paths with conflicting - * ranges, but it is good enough for most sane code. - */ -static int reg_val_propagate(struct jit_ctx *ctx) -{ - const struct bpf_prog *prog = ctx->skf; - u64 exit_rvt; - int reg; - int i; - - /* - * 11 registers * 3 bits/reg leaves top bits free for other - * uses. Bit-62..63 used to see if we have visited an insn. - */ - exit_rvt = 0; - - /* Upon entry, argument registers are 64-bit. */ - for (reg = BPF_REG_1; reg <= BPF_REG_5; reg++) - set_reg_val_type(&exit_rvt, reg, REG_64BIT); - - /* - * First follow all conditional branches on the fall-through - * edge of control flow.. - */ - reg_val_propagate_range(ctx, exit_rvt, 0, false); -restart_search: - /* - * Then repeatedly find the first conditional branch where - * both edges of control flow have not been taken, and follow - * the branch taken edge. We will end up restarting the - * search once per conditional branch insn. - */ - for (i = 0; i < prog->len; i++) { - u64 rvt = ctx->reg_val_types[i]; - - if ((rvt & RVT_VISITED_MASK) == RVT_DONE || - (rvt & RVT_VISITED_MASK) == 0) - continue; - if ((rvt & RVT_VISITED_MASK) == RVT_FALL_THROUGH) { - reg_val_propagate_range(ctx, rvt & ~RVT_VISITED_MASK, i, true); - } else { /* RVT_BRANCH_TAKEN */ - WARN(1, "Unexpected RVT_BRANCH_TAKEN case.\n"); - reg_val_propagate_range(ctx, rvt & ~RVT_VISITED_MASK, i, false); - } - goto restart_search; - } - /* - * Eventually all conditional branches have been followed on - * both branches and we are done. Any insn that has not been - * visited at this point is dead. - */ - - return 0; -} - -static void jit_fill_hole(void *area, unsigned int size) -{ - u32 *p; - - /* We are guaranteed to have aligned memory. */ - for (p = area; size >= sizeof(u32); size -= sizeof(u32)) - uasm_i_break(&p, BRK_BUG); /* Increments p */ -} - -struct bpf_prog *bpf_int_jit_compile(struct bpf_prog *prog) -{ - struct bpf_prog *orig_prog = prog; - bool tmp_blinded = false; - struct bpf_prog *tmp; - struct bpf_binary_header *header = NULL; - struct jit_ctx ctx; - unsigned int image_size; - u8 *image_ptr; - - if (!prog->jit_requested) - return prog; - - tmp = bpf_jit_blind_constants(prog); - /* If blinding was requested and we failed during blinding, - * we must fall back to the interpreter. - */ - if (IS_ERR(tmp)) - return orig_prog; - if (tmp != prog) { - tmp_blinded = true; - prog = tmp; - } - - memset(&ctx, 0, sizeof(ctx)); - - preempt_disable(); - switch (current_cpu_type()) { - case CPU_CAVIUM_OCTEON: - case CPU_CAVIUM_OCTEON_PLUS: - case CPU_CAVIUM_OCTEON2: - case CPU_CAVIUM_OCTEON3: - ctx.use_bbit_insns = 1; - break; - default: - ctx.use_bbit_insns = 0; - } - preempt_enable(); - - ctx.offsets = kcalloc(prog->len + 1, sizeof(*ctx.offsets), GFP_KERNEL); - if (ctx.offsets == NULL) - goto out_err; - - ctx.reg_val_types = kcalloc(prog->len + 1, sizeof(*ctx.reg_val_types), GFP_KERNEL); - if (ctx.reg_val_types == NULL) - goto out_err; - - ctx.skf = prog; - - if (reg_val_propagate(&ctx)) - goto out_err; - - /* - * First pass discovers used resources and instruction offsets - * assuming short branches are used. - */ - if (build_int_body(&ctx)) - goto out_err; - - /* - * If no calls are made (EBPF_SAVE_RA), then tail call count - * in $v1, else we must save in n$s4. - */ - if (ctx.flags & EBPF_SEEN_TC) { - if (ctx.flags & EBPF_SAVE_RA) - ctx.flags |= EBPF_SAVE_S4; - else - ctx.flags |= EBPF_TCC_IN_V1; - } - - /* - * Second pass generates offsets, if any branches are out of - * range a jump-around long sequence is generated, and we have - * to try again from the beginning to generate the new - * offsets. This is done until no additional conversions are - * necessary. - */ - do { - ctx.idx = 0; - ctx.gen_b_offsets = 1; - ctx.long_b_conversion = 0; - if (gen_int_prologue(&ctx)) - goto out_err; - if (build_int_body(&ctx)) - goto out_err; - if (build_int_epilogue(&ctx, MIPS_R_RA)) - goto out_err; - } while (ctx.long_b_conversion); - - image_size = 4 * ctx.idx; - - header = bpf_jit_binary_alloc(image_size, &image_ptr, - sizeof(u32), jit_fill_hole); - if (header == NULL) - goto out_err; - - ctx.target = (u32 *)image_ptr; - - /* Third pass generates the code */ - ctx.idx = 0; - if (gen_int_prologue(&ctx)) - goto out_err; - if (build_int_body(&ctx)) - goto out_err; - if (build_int_epilogue(&ctx, MIPS_R_RA)) - goto out_err; - - /* Update the icache */ - flush_icache_range((unsigned long)ctx.target, - (unsigned long)&ctx.target[ctx.idx]); - - if (bpf_jit_enable > 1) - /* Dump JIT code */ - bpf_jit_dump(prog->len, image_size, 2, ctx.target); - - bpf_jit_binary_lock_ro(header); - prog->bpf_func = (void *)ctx.target; - prog->jited = 1; - prog->jited_len = image_size; -out_normal: - if (tmp_blinded) - bpf_jit_prog_release_other(prog, prog == orig_prog ? - tmp : orig_prog); - kfree(ctx.offsets); - kfree(ctx.reg_val_types); - - return prog; - -out_err: - prog = orig_prog; - if (header) - bpf_jit_binary_free(header); - goto out_normal; -} From 90982e13561e0d8df91d49658d3bf068ae9f2dff Mon Sep 17 00:00:00 2001 From: Daniel Borkmann Date: Wed, 6 Oct 2021 16:08:25 +0200 Subject: [PATCH 022/181] bpf, arm: Remove dummy bpf_jit_compile stub The BPF core defines a __weak bpf_jit_compile() dummy function already which should only be overridden by JITs if they actually implement a legacy cBPF JIT. Given arm implements an eBPF JIT, this stub is not needed. Now that MIPS cBPF JIT is finally gone, the only JIT left that is still implementing bpf_jit_compile() is the sparc32 one. Signed-off-by: Daniel Borkmann Signed-off-by: Andrii Nakryiko --- arch/arm/net/bpf_jit_32.c | 5 ----- 1 file changed, 5 deletions(-) diff --git a/arch/arm/net/bpf_jit_32.c b/arch/arm/net/bpf_jit_32.c index a951276f05475..ce75c6b2e7510 100644 --- a/arch/arm/net/bpf_jit_32.c +++ b/arch/arm/net/bpf_jit_32.c @@ -1863,11 +1863,6 @@ static int validate_code(struct jit_ctx *ctx) return 0; } -void bpf_jit_compile(struct bpf_prog *prog) -{ - /* Nothing to do here. We support Internal BPF. */ -} - bool bpf_jit_needs_zext(void) { return true; From 929bef467771d4d5a22b9edb51a2025dc0e49113 Mon Sep 17 00:00:00 2001 From: Quentin Monnet Date: Wed, 6 Oct 2021 12:10:49 +0100 Subject: [PATCH 023/181] bpf: Use $(pound) instead of \# in Makefiles Recent-ish versions of make do no longer consider number signs ("#") as comment symbols when they are inserted inside of a macro reference or in a function invocation. In such cases, the symbols should not be escaped. There are a few occurrences of "\#" in libbpf's and samples' Makefiles. In the former, the backslash is harmless, because grep associates no particular meaning to the escaped symbol and reads it as a regular "#". In samples' Makefile, recent versions of make will pass the backslash down to the compiler, making the probe fail all the time and resulting in the display of a warning about "make headers_install" being required, even after headers have been installed. A similar issue has been addressed at some other locations by commit 9564a8cf422d ("Kbuild: fix # escaping in .cmd files for future Make"). Let's address it for libbpf's and samples' Makefiles in the same fashion, by using a "$(pound)" variable (pulled from tools/scripts/Makefile.include for libbpf, or re-defined for the samples). Reference for the change in make: https://git.savannah.gnu.org/cgit/make.git/commit/?id=c6966b323811c37acedff05b57 Fixes: 2f3830412786 ("libbpf: Make libbpf_version.h non-auto-generated") Fixes: 07c3bbdb1a9b ("samples: bpf: print a warning about headers_install") Signed-off-by: Quentin Monnet Signed-off-by: Andrii Nakryiko Link: https://lore.kernel.org/bpf/20211006111049.20708-1-quentin@isovalent.com --- samples/bpf/Makefile | 4 +++- tools/lib/bpf/Makefile | 4 ++-- 2 files changed, 5 insertions(+), 3 deletions(-) diff --git a/samples/bpf/Makefile b/samples/bpf/Makefile index 4dc20be5fb96a..a5783749ec150 100644 --- a/samples/bpf/Makefile +++ b/samples/bpf/Makefile @@ -3,6 +3,8 @@ BPF_SAMPLES_PATH ?= $(abspath $(srctree)/$(src)) TOOLS_PATH := $(BPF_SAMPLES_PATH)/../../tools +pound := \# + # List of programs to build tprogs-y := test_lru_dist tprogs-y += sock_example @@ -232,7 +234,7 @@ endif # Don't evaluate probes and warnings if we need to run make recursively ifneq ($(src),) -HDR_PROBE := $(shell printf "\#include \n struct list_head { int a; }; int main() { return 0; }" | \ +HDR_PROBE := $(shell printf "$(pound)include \n struct list_head { int a; }; int main() { return 0; }" | \ $(CC) $(TPROGS_CFLAGS) $(TPROGS_LDFLAGS) -x c - \ -o /dev/null 2>/dev/null && echo okay) diff --git a/tools/lib/bpf/Makefile b/tools/lib/bpf/Makefile index 0f766345506f2..41e4f78dbad5b 100644 --- a/tools/lib/bpf/Makefile +++ b/tools/lib/bpf/Makefile @@ -208,8 +208,8 @@ check_abi: $(OUTPUT)libbpf.so $(VERSION_SCRIPT) exit 1; \ fi -HDR_MAJ_VERSION := $(shell grep -oE '^\#define LIBBPF_MAJOR_VERSION ([0-9]+)$$' libbpf_version.h | cut -d' ' -f3) -HDR_MIN_VERSION := $(shell grep -oE '^\#define LIBBPF_MINOR_VERSION ([0-9]+)$$' libbpf_version.h | cut -d' ' -f3) +HDR_MAJ_VERSION := $(shell grep -oE '^$(pound)define LIBBPF_MAJOR_VERSION ([0-9]+)$$' libbpf_version.h | cut -d' ' -f3) +HDR_MIN_VERSION := $(shell grep -oE '^$(pound)define LIBBPF_MINOR_VERSION ([0-9]+)$$' libbpf_version.h | cut -d' ' -f3) check_version: $(VERSION_SCRIPT) libbpf_version.h @if [ "$(HDR_MAJ_VERSION)" != "$(LIBBPF_MAJOR_VERSION)" ]; then \ From 189c83bdde850e1fc8bb347f813cdd8776ce7abf Mon Sep 17 00:00:00 2001 From: Jiri Olsa Date: Sun, 3 Oct 2021 18:49:25 +0200 Subject: [PATCH 024/181] selftest/bpf: Switch recursion test to use htab_map_delete_elem Currently the recursion test is hooking __htab_map_lookup_elem function, which is invoked both from bpf_prog and bpf syscall. But in our kernel build, the __htab_map_lookup_elem gets inlined within the htab_map_lookup_elem, so it's not trigered and the test fails. Fixing this by using htab_map_delete_elem, which is not inlined for bpf_prog calls (like htab_map_lookup_elem is) and is used directly as pointer for map_delete_elem, so it won't disappear by inlining. Signed-off-by: Jiri Olsa Signed-off-by: Andrii Nakryiko Acked-by: Song Liu Link: https://lore.kernel.org/bpf/YVnfFTL/3T6jOwHI@krava --- tools/testing/selftests/bpf/prog_tests/recursion.c | 10 +++++----- tools/testing/selftests/bpf/progs/recursion.c | 9 +++------ 2 files changed, 8 insertions(+), 11 deletions(-) diff --git a/tools/testing/selftests/bpf/prog_tests/recursion.c b/tools/testing/selftests/bpf/prog_tests/recursion.c index 0e378d63fe183..f3af2627b5997 100644 --- a/tools/testing/selftests/bpf/prog_tests/recursion.c +++ b/tools/testing/selftests/bpf/prog_tests/recursion.c @@ -20,18 +20,18 @@ void test_recursion(void) goto out; ASSERT_EQ(skel->bss->pass1, 0, "pass1 == 0"); - bpf_map_lookup_elem(bpf_map__fd(skel->maps.hash1), &key, 0); + bpf_map_delete_elem(bpf_map__fd(skel->maps.hash1), &key); ASSERT_EQ(skel->bss->pass1, 1, "pass1 == 1"); - bpf_map_lookup_elem(bpf_map__fd(skel->maps.hash1), &key, 0); + bpf_map_delete_elem(bpf_map__fd(skel->maps.hash1), &key); ASSERT_EQ(skel->bss->pass1, 2, "pass1 == 2"); ASSERT_EQ(skel->bss->pass2, 0, "pass2 == 0"); - bpf_map_lookup_elem(bpf_map__fd(skel->maps.hash2), &key, 0); + bpf_map_delete_elem(bpf_map__fd(skel->maps.hash2), &key); ASSERT_EQ(skel->bss->pass2, 1, "pass2 == 1"); - bpf_map_lookup_elem(bpf_map__fd(skel->maps.hash2), &key, 0); + bpf_map_delete_elem(bpf_map__fd(skel->maps.hash2), &key); ASSERT_EQ(skel->bss->pass2, 2, "pass2 == 2"); - err = bpf_obj_get_info_by_fd(bpf_program__fd(skel->progs.on_lookup), + err = bpf_obj_get_info_by_fd(bpf_program__fd(skel->progs.on_delete), &prog_info, &prog_info_len); if (!ASSERT_OK(err, "get_prog_info")) goto out; diff --git a/tools/testing/selftests/bpf/progs/recursion.c b/tools/testing/selftests/bpf/progs/recursion.c index 49f679375b9d6..3c2423bb19e28 100644 --- a/tools/testing/selftests/bpf/progs/recursion.c +++ b/tools/testing/selftests/bpf/progs/recursion.c @@ -24,8 +24,8 @@ struct { int pass1 = 0; int pass2 = 0; -SEC("fentry/__htab_map_lookup_elem") -int BPF_PROG(on_lookup, struct bpf_map *map) +SEC("fentry/htab_map_delete_elem") +int BPF_PROG(on_delete, struct bpf_map *map) { int key = 0; @@ -35,10 +35,7 @@ int BPF_PROG(on_lookup, struct bpf_map *map) } if (map == (void *)&hash2) { pass2++; - /* htab_map_gen_lookup() will inline below call - * into direct call to __htab_map_lookup_elem() - */ - bpf_map_lookup_elem(&hash2, &key); + bpf_map_delete_elem(&hash2, &key); return 0; } From 2088a3a71d870115fdfb799c0f7de76d7383ba03 Mon Sep 17 00:00:00 2001 From: Hengqi Chen Date: Mon, 4 Oct 2021 00:58:43 +0800 Subject: [PATCH 025/181] libbpf: Deprecate bpf_{map,program}__{prev,next} APIs since v0.7 Deprecate bpf_{map,program}__{prev,next} APIs. Replace them with a new set of APIs named bpf_object__{prev,next}_{program,map} which follow the libbpf API naming convention ([0]). No functionality changes. [0] Closes: https://github.com/libbpf/libbpf/issues/296 Signed-off-by: Hengqi Chen Signed-off-by: Andrii Nakryiko Acked-by: Song Liu Link: https://lore.kernel.org/bpf/20211003165844.4054931-2-hengqi.chen@gmail.com --- tools/lib/bpf/libbpf.c | 24 ++++++++++++++++++++++++ tools/lib/bpf/libbpf.h | 35 +++++++++++++++++++++++------------ tools/lib/bpf/libbpf.map | 4 ++++ 3 files changed, 51 insertions(+), 12 deletions(-) diff --git a/tools/lib/bpf/libbpf.c b/tools/lib/bpf/libbpf.c index 4b90878a315f4..ed313fd491bd2 100644 --- a/tools/lib/bpf/libbpf.c +++ b/tools/lib/bpf/libbpf.c @@ -7834,6 +7834,12 @@ __bpf_program__iter(const struct bpf_program *p, const struct bpf_object *obj, struct bpf_program * bpf_program__next(struct bpf_program *prev, const struct bpf_object *obj) +{ + return bpf_object__next_program(obj, prev); +} + +struct bpf_program * +bpf_object__next_program(const struct bpf_object *obj, struct bpf_program *prev) { struct bpf_program *prog = prev; @@ -7846,6 +7852,12 @@ bpf_program__next(struct bpf_program *prev, const struct bpf_object *obj) struct bpf_program * bpf_program__prev(struct bpf_program *next, const struct bpf_object *obj) +{ + return bpf_object__prev_program(obj, next); +} + +struct bpf_program * +bpf_object__prev_program(const struct bpf_object *obj, struct bpf_program *next) { struct bpf_program *prog = next; @@ -8778,6 +8790,12 @@ __bpf_map__iter(const struct bpf_map *m, const struct bpf_object *obj, int i) struct bpf_map * bpf_map__next(const struct bpf_map *prev, const struct bpf_object *obj) +{ + return bpf_object__next_map(obj, prev); +} + +struct bpf_map * +bpf_object__next_map(const struct bpf_object *obj, const struct bpf_map *prev) { if (prev == NULL) return obj->maps; @@ -8787,6 +8805,12 @@ bpf_map__next(const struct bpf_map *prev, const struct bpf_object *obj) struct bpf_map * bpf_map__prev(const struct bpf_map *next, const struct bpf_object *obj) +{ + return bpf_object__prev_map(obj, next); +} + +struct bpf_map * +bpf_object__prev_map(const struct bpf_object *obj, const struct bpf_map *next) { if (next == NULL) { if (!obj->nr_maps) diff --git a/tools/lib/bpf/libbpf.h b/tools/lib/bpf/libbpf.h index df10d14dbbb88..89ca9c83ed4ed 100644 --- a/tools/lib/bpf/libbpf.h +++ b/tools/lib/bpf/libbpf.h @@ -190,16 +190,22 @@ LIBBPF_API int libbpf_find_vmlinux_btf_id(const char *name, /* Accessors of bpf_program */ struct bpf_program; -LIBBPF_API struct bpf_program *bpf_program__next(struct bpf_program *prog, - const struct bpf_object *obj); +LIBBPF_API LIBBPF_DEPRECATED_SINCE(0, 7, "use bpf_object__next_program() instead") +struct bpf_program *bpf_program__next(struct bpf_program *prog, + const struct bpf_object *obj); +LIBBPF_API struct bpf_program * +bpf_object__next_program(const struct bpf_object *obj, struct bpf_program *prog); -#define bpf_object__for_each_program(pos, obj) \ - for ((pos) = bpf_program__next(NULL, (obj)); \ - (pos) != NULL; \ - (pos) = bpf_program__next((pos), (obj))) +#define bpf_object__for_each_program(pos, obj) \ + for ((pos) = bpf_object__next_program((obj), NULL); \ + (pos) != NULL; \ + (pos) = bpf_object__next_program((obj), (pos))) -LIBBPF_API struct bpf_program *bpf_program__prev(struct bpf_program *prog, - const struct bpf_object *obj); +LIBBPF_API LIBBPF_DEPRECATED_SINCE(0, 7, "use bpf_object__prev_program() instead") +struct bpf_program *bpf_program__prev(struct bpf_program *prog, + const struct bpf_object *obj); +LIBBPF_API struct bpf_program * +bpf_object__prev_program(const struct bpf_object *obj, struct bpf_program *prog); typedef void (*bpf_program_clear_priv_t)(struct bpf_program *, void *); @@ -503,16 +509,21 @@ bpf_object__find_map_fd_by_name(const struct bpf_object *obj, const char *name); LIBBPF_API struct bpf_map * bpf_object__find_map_by_offset(struct bpf_object *obj, size_t offset); +LIBBPF_API LIBBPF_DEPRECATED_SINCE(0, 7, "use bpf_object__next_map() instead") +struct bpf_map *bpf_map__next(const struct bpf_map *map, const struct bpf_object *obj); LIBBPF_API struct bpf_map * -bpf_map__next(const struct bpf_map *map, const struct bpf_object *obj); +bpf_object__next_map(const struct bpf_object *obj, const struct bpf_map *map); + #define bpf_object__for_each_map(pos, obj) \ - for ((pos) = bpf_map__next(NULL, (obj)); \ + for ((pos) = bpf_object__next_map((obj), NULL); \ (pos) != NULL; \ - (pos) = bpf_map__next((pos), (obj))) + (pos) = bpf_object__next_map((obj), (pos))) #define bpf_map__for_each bpf_object__for_each_map +LIBBPF_API LIBBPF_DEPRECATED_SINCE(0, 7, "use bpf_object__prev_map() instead") +struct bpf_map *bpf_map__prev(const struct bpf_map *map, const struct bpf_object *obj); LIBBPF_API struct bpf_map * -bpf_map__prev(const struct bpf_map *map, const struct bpf_object *obj); +bpf_object__prev_map(const struct bpf_object *obj, const struct bpf_map *map); /** * @brief **bpf_map__fd()** gets the file descriptor of the passed diff --git a/tools/lib/bpf/libbpf.map b/tools/lib/bpf/libbpf.map index f6b0db1e8c8b4..f270d25e4af3f 100644 --- a/tools/lib/bpf/libbpf.map +++ b/tools/lib/bpf/libbpf.map @@ -389,6 +389,10 @@ LIBBPF_0.5.0 { LIBBPF_0.6.0 { global: + bpf_object__next_map; + bpf_object__next_program; + bpf_object__prev_map; + bpf_object__prev_program; btf__add_btf; btf__add_tag; } LIBBPF_0.5.0; From 9330303446382a33aa62a8a88a7a48555f76df0b Mon Sep 17 00:00:00 2001 From: Grant Seltzer Date: Mon, 4 Oct 2021 17:56:44 -0400 Subject: [PATCH 026/181] libbpf: Add API documentation convention guidelines This adds a section to the documentation for libbpf naming convention which describes how to document API features in libbpf, specifically the format of which API doc comments need to conform to. Signed-off-by: Grant Seltzer Signed-off-by: Andrii Nakryiko Acked-by: Song Liu Link: https://lore.kernel.org/bpf/20211004215644.497327-1-grantseltzer@gmail.com --- .../bpf/libbpf/libbpf_naming_convention.rst | 40 +++++++++++++++++++ 1 file changed, 40 insertions(+) diff --git a/Documentation/bpf/libbpf/libbpf_naming_convention.rst b/Documentation/bpf/libbpf/libbpf_naming_convention.rst index 9c68d5014ff1b..f86360f734a84 100644 --- a/Documentation/bpf/libbpf/libbpf_naming_convention.rst +++ b/Documentation/bpf/libbpf/libbpf_naming_convention.rst @@ -150,6 +150,46 @@ mirror of the mainline's version of libbpf for a stand-alone build. However, all changes to libbpf's code base must be upstreamed through the mainline kernel tree. + +API documentation convention +============================ + +The libbpf API is documented via comments above definitions in +header files. These comments can be rendered by doxygen and sphinx +for well organized html output. This section describes the +convention in which these comments should be formated. + +Here is an example from btf.h: + +.. code-block:: c + + /** + * @brief **btf__new()** creates a new instance of a BTF object from the raw + * bytes of an ELF's BTF section + * @param data raw bytes + * @param size number of bytes passed in `data` + * @return new BTF object instance which has to be eventually freed with + * **btf__free()** + * + * On error, error-code-encoded-as-pointer is returned, not a NULL. To extract + * error code from such a pointer `libbpf_get_error()` should be used. If + * `libbpf_set_strict_mode(LIBBPF_STRICT_CLEAN_PTRS)` is enabled, NULL is + * returned on error instead. In both cases thread-local `errno` variable is + * always set to error code as well. + */ + +The comment must start with a block comment of the form '/\*\*'. + +The documentation always starts with a @brief directive. This line is a short +description about this API. It starts with the name of the API, denoted in bold +like so: **api_name**. Please include an open and close parenthesis if this is a +function. Follow with the short description of the API. A longer form description +can be added below the last directive, at the bottom of the comment. + +Parameters are denoted with the @param directive, there should be one for each +parameter. If this is a function with a non-void return, use the @return directive +to document it. + License ------------------- From 6f2b219b62a4376ca2da15c503de79d0650c8155 Mon Sep 17 00:00:00 2001 From: Hengqi Chen Date: Mon, 4 Oct 2021 00:58:44 +0800 Subject: [PATCH 027/181] selftests/bpf: Switch to new bpf_object__next_{map,program} APIs Replace deprecated bpf_{map,program}__next APIs with newly added bpf_object__next_{map,program} APIs, so that no compilation warnings emit. Signed-off-by: Hengqi Chen Signed-off-by: Andrii Nakryiko Acked-by: Song Liu Link: https://lore.kernel.org/bpf/20211003165844.4054931-3-hengqi.chen@gmail.com --- samples/bpf/xdp1_user.c | 2 +- samples/bpf/xdp_sample_pkts_user.c | 2 +- tools/bpf/bpftool/iter.c | 2 +- tools/bpf/bpftool/prog.c | 2 +- tools/testing/selftests/bpf/prog_tests/btf.c | 2 +- tools/testing/selftests/bpf/prog_tests/fexit_bpf2bpf.c | 6 +++--- tools/testing/selftests/bpf/prog_tests/select_reuseport.c | 2 +- tools/testing/selftests/bpf/prog_tests/tcp_rtt.c | 2 +- tools/testing/selftests/bpf/xdping.c | 2 +- 9 files changed, 11 insertions(+), 11 deletions(-) diff --git a/samples/bpf/xdp1_user.c b/samples/bpf/xdp1_user.c index 116e39f6b6666..8675fa5273df8 100644 --- a/samples/bpf/xdp1_user.c +++ b/samples/bpf/xdp1_user.c @@ -128,7 +128,7 @@ int main(int argc, char **argv) if (bpf_prog_load_xattr(&prog_load_attr, &obj, &prog_fd)) return 1; - map = bpf_map__next(NULL, obj); + map = bpf_object__next_map(obj, NULL); if (!map) { printf("finding a map in obj file failed\n"); return 1; diff --git a/samples/bpf/xdp_sample_pkts_user.c b/samples/bpf/xdp_sample_pkts_user.c index 495e09897bd3d..f4382ccdcbb18 100644 --- a/samples/bpf/xdp_sample_pkts_user.c +++ b/samples/bpf/xdp_sample_pkts_user.c @@ -154,7 +154,7 @@ int main(int argc, char **argv) return 1; } - map = bpf_map__next(NULL, obj); + map = bpf_object__next_map(obj, NULL); if (!map) { printf("finding a map in obj file failed\n"); return 1; diff --git a/tools/bpf/bpftool/iter.c b/tools/bpf/bpftool/iter.c index 84a9b01d956dc..6c0de647b8ad5 100644 --- a/tools/bpf/bpftool/iter.c +++ b/tools/bpf/bpftool/iter.c @@ -57,7 +57,7 @@ static int do_pin(int argc, char **argv) goto close_obj; } - prog = bpf_program__next(NULL, obj); + prog = bpf_object__next_program(obj, NULL); if (!prog) { p_err("can't find bpf program in objfile %s", objfile); goto close_obj; diff --git a/tools/bpf/bpftool/prog.c b/tools/bpf/bpftool/prog.c index 9c3e343b7d872..a24ea7e26aa43 100644 --- a/tools/bpf/bpftool/prog.c +++ b/tools/bpf/bpftool/prog.c @@ -1601,7 +1601,7 @@ static int load_with_options(int argc, char **argv, bool first_prog_only) goto err_close_obj; if (first_prog_only) { - prog = bpf_program__next(NULL, obj); + prog = bpf_object__next_program(obj, NULL); if (!prog) { p_err("object file doesn't contain any bpf program"); goto err_close_obj; diff --git a/tools/testing/selftests/bpf/prog_tests/btf.c b/tools/testing/selftests/bpf/prog_tests/btf.c index 9c85d7d27409b..acd33d0cd5d97 100644 --- a/tools/testing/selftests/bpf/prog_tests/btf.c +++ b/tools/testing/selftests/bpf/prog_tests/btf.c @@ -4511,7 +4511,7 @@ static void do_test_file(unsigned int test_num) if (CHECK(err, "obj: %d", err)) return; - prog = bpf_program__next(NULL, obj); + prog = bpf_object__next_program(obj, NULL); if (CHECK(!prog, "Cannot find bpf_prog")) { err = -1; goto done; diff --git a/tools/testing/selftests/bpf/prog_tests/fexit_bpf2bpf.c b/tools/testing/selftests/bpf/prog_tests/fexit_bpf2bpf.c index c7c1816899bf6..2839f4270a26e 100644 --- a/tools/testing/selftests/bpf/prog_tests/fexit_bpf2bpf.c +++ b/tools/testing/selftests/bpf/prog_tests/fexit_bpf2bpf.c @@ -285,7 +285,7 @@ static void test_fmod_ret_freplace(void) if (!ASSERT_OK_PTR(freplace_obj, "freplace_obj_open")) goto out; - prog = bpf_program__next(NULL, freplace_obj); + prog = bpf_object__next_program(freplace_obj, NULL); err = bpf_program__set_attach_target(prog, pkt_fd, NULL); ASSERT_OK(err, "freplace__set_attach_target"); @@ -302,7 +302,7 @@ static void test_fmod_ret_freplace(void) goto out; attach_prog_fd = bpf_program__fd(prog); - prog = bpf_program__next(NULL, fmod_obj); + prog = bpf_object__next_program(fmod_obj, NULL); err = bpf_program__set_attach_target(prog, attach_prog_fd, NULL); ASSERT_OK(err, "fmod_ret_set_attach_target"); @@ -352,7 +352,7 @@ static void test_obj_load_failure_common(const char *obj_file, if (!ASSERT_OK_PTR(obj, "obj_open")) goto close_prog; - prog = bpf_program__next(NULL, obj); + prog = bpf_object__next_program(obj, NULL); err = bpf_program__set_attach_target(prog, pkt_fd, NULL); ASSERT_OK(err, "set_attach_target"); diff --git a/tools/testing/selftests/bpf/prog_tests/select_reuseport.c b/tools/testing/selftests/bpf/prog_tests/select_reuseport.c index 4efd337d6a3c4..d40e9156c48d1 100644 --- a/tools/testing/selftests/bpf/prog_tests/select_reuseport.c +++ b/tools/testing/selftests/bpf/prog_tests/select_reuseport.c @@ -114,7 +114,7 @@ static int prepare_bpf_obj(void) err = bpf_object__load(obj); RET_ERR(err, "load bpf_object", "err:%d\n", err); - prog = bpf_program__next(NULL, obj); + prog = bpf_object__next_program(obj, NULL); RET_ERR(!prog, "get first bpf_program", "!prog\n"); select_by_skb_data_prog = bpf_program__fd(prog); RET_ERR(select_by_skb_data_prog < 0, "get prog fd", diff --git a/tools/testing/selftests/bpf/prog_tests/tcp_rtt.c b/tools/testing/selftests/bpf/prog_tests/tcp_rtt.c index d207e968e6b1b..265b4fe33ec33 100644 --- a/tools/testing/selftests/bpf/prog_tests/tcp_rtt.c +++ b/tools/testing/selftests/bpf/prog_tests/tcp_rtt.c @@ -109,7 +109,7 @@ static int run_test(int cgroup_fd, int server_fd) return -1; } - map = bpf_map__next(NULL, obj); + map = bpf_object__next_map(obj, NULL); map_fd = bpf_map__fd(map); err = bpf_prog_attach(prog_fd, cgroup_fd, BPF_CGROUP_SOCK_OPS, 0); diff --git a/tools/testing/selftests/bpf/xdping.c b/tools/testing/selftests/bpf/xdping.c index 79a3453dab255..30f12637f4e4a 100644 --- a/tools/testing/selftests/bpf/xdping.c +++ b/tools/testing/selftests/bpf/xdping.c @@ -187,7 +187,7 @@ int main(int argc, char **argv) return 1; } - map = bpf_map__next(NULL, obj); + map = bpf_object__next_map(obj, NULL); if (map) map_fd = bpf_map__fd(map); if (!map || map_fd < 0) { From 4a404a7e8a3902fc560527241a611186605efb4e Mon Sep 17 00:00:00 2001 From: Hengqi Chen Date: Sun, 3 Oct 2021 00:10:00 +0800 Subject: [PATCH 028/181] libbpf: Deprecate bpf_object__unload() API since v0.6 BPF objects are not reloadable after unload. Users are expected to use bpf_object__close() to unload and free up resources in one operation. No need to expose bpf_object__unload() as a public API, deprecate it ([0]). Add bpf_object__unload() as an alias to internal bpf_object_unload() and replace all bpf_object__unload() uses to avoid compilation errors. [0] Closes: https://github.com/libbpf/libbpf/issues/290 Signed-off-by: Hengqi Chen Signed-off-by: Andrii Nakryiko Acked-by: Song Liu Link: https://lore.kernel.org/bpf/20211002161000.3854559-1-hengqi.chen@gmail.com --- tools/lib/bpf/libbpf.c | 8 +++++--- tools/lib/bpf/libbpf.h | 1 + 2 files changed, 6 insertions(+), 3 deletions(-) diff --git a/tools/lib/bpf/libbpf.c b/tools/lib/bpf/libbpf.c index f32fa51b1e63e..4b90878a315f4 100644 --- a/tools/lib/bpf/libbpf.c +++ b/tools/lib/bpf/libbpf.c @@ -6672,7 +6672,7 @@ bpf_object__open_buffer(const void *obj_buf, size_t obj_buf_sz, return libbpf_ptr(__bpf_object__open(NULL, obj_buf, obj_buf_sz, &opts)); } -int bpf_object__unload(struct bpf_object *obj) +static int bpf_object_unload(struct bpf_object *obj) { size_t i; @@ -6691,6 +6691,8 @@ int bpf_object__unload(struct bpf_object *obj) return 0; } +int bpf_object__unload(struct bpf_object *obj) __attribute__((alias("bpf_object_unload"))); + static int bpf_object__sanitize_maps(struct bpf_object *obj) { struct bpf_map *m; @@ -7089,7 +7091,7 @@ int bpf_object__load_xattr(struct bpf_object_load_attr *attr) if (obj->maps[i].pinned && !obj->maps[i].reused) bpf_map__unpin(&obj->maps[i], NULL); - bpf_object__unload(obj); + bpf_object_unload(obj); pr_warn("failed to load object '%s'\n", obj->path); return libbpf_err(err); } @@ -7698,7 +7700,7 @@ void bpf_object__close(struct bpf_object *obj) bpf_gen__free(obj->gen_loader); bpf_object__elf_finish(obj); - bpf_object__unload(obj); + bpf_object_unload(obj); btf__free(obj->btf); btf_ext__free(obj->btf_ext); diff --git a/tools/lib/bpf/libbpf.h b/tools/lib/bpf/libbpf.h index e35490c54eb3a..df10d14dbbb88 100644 --- a/tools/lib/bpf/libbpf.h +++ b/tools/lib/bpf/libbpf.h @@ -150,6 +150,7 @@ struct bpf_object_load_attr { /* Load/unload object into/from kernel */ LIBBPF_API int bpf_object__load(struct bpf_object *obj); LIBBPF_API int bpf_object__load_xattr(struct bpf_object_load_attr *attr); +LIBBPF_DEPRECATED_SINCE(0, 6, "bpf_object__unload() is deprecated, use bpf_object__close() instead") LIBBPF_API int bpf_object__unload(struct bpf_object *obj); LIBBPF_API const char *bpf_object__name(const struct bpf_object *obj); From 6364d7d75a0e015a405d1f8a07f267f076c36ca6 Mon Sep 17 00:00:00 2001 From: Jie Meng Date: Wed, 6 Oct 2021 12:41:35 -0700 Subject: [PATCH 029/181] bpf, x64: Factor out emission of REX byte in more cases Introduce a single reg version of maybe_emit_mod() and factor out common code in more cases. Signed-off-by: Jie Meng Signed-off-by: Daniel Borkmann Acked-by: Song Liu Link: https://lore.kernel.org/bpf/20211006194135.608932-1-jmeng@fb.com --- arch/x86/net/bpf_jit_comp.c | 67 +++++++++++++++++-------------------- 1 file changed, 31 insertions(+), 36 deletions(-) diff --git a/arch/x86/net/bpf_jit_comp.c b/arch/x86/net/bpf_jit_comp.c index 5a0edea3cc2e4..e474718d152b6 100644 --- a/arch/x86/net/bpf_jit_comp.c +++ b/arch/x86/net/bpf_jit_comp.c @@ -721,6 +721,20 @@ static void maybe_emit_mod(u8 **pprog, u32 dst_reg, u32 src_reg, bool is64) *pprog = prog; } +/* + * Similar version of maybe_emit_mod() for a single register + */ +static void maybe_emit_1mod(u8 **pprog, u32 reg, bool is64) +{ + u8 *prog = *pprog; + + if (is64) + EMIT1(add_1mod(0x48, reg)); + else if (is_ereg(reg)) + EMIT1(add_1mod(0x40, reg)); + *pprog = prog; +} + /* LDX: dst_reg = *(u8*)(src_reg + off) */ static void emit_ldx(u8 **pprog, u32 size, u32 dst_reg, u32 src_reg, int off) { @@ -951,10 +965,8 @@ static int do_jit(struct bpf_prog *bpf_prog, int *addrs, u8 *image, /* neg dst */ case BPF_ALU | BPF_NEG: case BPF_ALU64 | BPF_NEG: - if (BPF_CLASS(insn->code) == BPF_ALU64) - EMIT1(add_1mod(0x48, dst_reg)); - else if (is_ereg(dst_reg)) - EMIT1(add_1mod(0x40, dst_reg)); + maybe_emit_1mod(&prog, dst_reg, + BPF_CLASS(insn->code) == BPF_ALU64); EMIT2(0xF7, add_1reg(0xD8, dst_reg)); break; @@ -968,10 +980,8 @@ static int do_jit(struct bpf_prog *bpf_prog, int *addrs, u8 *image, case BPF_ALU64 | BPF_AND | BPF_K: case BPF_ALU64 | BPF_OR | BPF_K: case BPF_ALU64 | BPF_XOR | BPF_K: - if (BPF_CLASS(insn->code) == BPF_ALU64) - EMIT1(add_1mod(0x48, dst_reg)); - else if (is_ereg(dst_reg)) - EMIT1(add_1mod(0x40, dst_reg)); + maybe_emit_1mod(&prog, dst_reg, + BPF_CLASS(insn->code) == BPF_ALU64); /* * b3 holds 'normal' opcode, b2 short form only valid @@ -1059,11 +1069,8 @@ static int do_jit(struct bpf_prog *bpf_prog, int *addrs, u8 *image, */ EMIT2(0x31, 0xd2); - if (is64) - EMIT1(add_1mod(0x48, src_reg)); - else if (is_ereg(src_reg)) - EMIT1(add_1mod(0x40, src_reg)); /* div src_reg */ + maybe_emit_1mod(&prog, src_reg, is64); EMIT2(0xF7, add_1reg(0xF0, src_reg)); if (BPF_OP(insn->code) == BPF_MOD && @@ -1084,10 +1091,8 @@ static int do_jit(struct bpf_prog *bpf_prog, int *addrs, u8 *image, case BPF_ALU | BPF_MUL | BPF_K: case BPF_ALU64 | BPF_MUL | BPF_K: - if (BPF_CLASS(insn->code) == BPF_ALU64) - EMIT1(add_2mod(0x48, dst_reg, dst_reg)); - else if (is_ereg(dst_reg)) - EMIT1(add_2mod(0x40, dst_reg, dst_reg)); + maybe_emit_mod(&prog, dst_reg, dst_reg, + BPF_CLASS(insn->code) == BPF_ALU64); if (is_imm8(imm32)) /* imul dst_reg, dst_reg, imm8 */ @@ -1102,10 +1107,8 @@ static int do_jit(struct bpf_prog *bpf_prog, int *addrs, u8 *image, case BPF_ALU | BPF_MUL | BPF_X: case BPF_ALU64 | BPF_MUL | BPF_X: - if (BPF_CLASS(insn->code) == BPF_ALU64) - EMIT1(add_2mod(0x48, src_reg, dst_reg)); - else if (is_ereg(dst_reg) || is_ereg(src_reg)) - EMIT1(add_2mod(0x40, src_reg, dst_reg)); + maybe_emit_mod(&prog, src_reg, dst_reg, + BPF_CLASS(insn->code) == BPF_ALU64); /* imul dst_reg, src_reg */ EMIT3(0x0F, 0xAF, add_2reg(0xC0, src_reg, dst_reg)); @@ -1118,10 +1121,8 @@ static int do_jit(struct bpf_prog *bpf_prog, int *addrs, u8 *image, case BPF_ALU64 | BPF_LSH | BPF_K: case BPF_ALU64 | BPF_RSH | BPF_K: case BPF_ALU64 | BPF_ARSH | BPF_K: - if (BPF_CLASS(insn->code) == BPF_ALU64) - EMIT1(add_1mod(0x48, dst_reg)); - else if (is_ereg(dst_reg)) - EMIT1(add_1mod(0x40, dst_reg)); + maybe_emit_1mod(&prog, dst_reg, + BPF_CLASS(insn->code) == BPF_ALU64); b3 = simple_alu_opcodes[BPF_OP(insn->code)]; if (imm32 == 1) @@ -1152,10 +1153,8 @@ static int do_jit(struct bpf_prog *bpf_prog, int *addrs, u8 *image, } /* shl %rax, %cl | shr %rax, %cl | sar %rax, %cl */ - if (BPF_CLASS(insn->code) == BPF_ALU64) - EMIT1(add_1mod(0x48, dst_reg)); - else if (is_ereg(dst_reg)) - EMIT1(add_1mod(0x40, dst_reg)); + maybe_emit_1mod(&prog, dst_reg, + BPF_CLASS(insn->code) == BPF_ALU64); b3 = simple_alu_opcodes[BPF_OP(insn->code)]; EMIT2(0xD3, add_1reg(b3, dst_reg)); @@ -1465,10 +1464,8 @@ st: if (is_imm8(insn->off)) case BPF_JMP | BPF_JSET | BPF_K: case BPF_JMP32 | BPF_JSET | BPF_K: /* test dst_reg, imm32 */ - if (BPF_CLASS(insn->code) == BPF_JMP) - EMIT1(add_1mod(0x48, dst_reg)); - else if (is_ereg(dst_reg)) - EMIT1(add_1mod(0x40, dst_reg)); + maybe_emit_1mod(&prog, dst_reg, + BPF_CLASS(insn->code) == BPF_JMP); EMIT2_off32(0xF7, add_1reg(0xC0, dst_reg), imm32); goto emit_cond_jmp; @@ -1501,10 +1498,8 @@ st: if (is_imm8(insn->off)) } /* cmp dst_reg, imm8/32 */ - if (BPF_CLASS(insn->code) == BPF_JMP) - EMIT1(add_1mod(0x48, dst_reg)); - else if (is_ereg(dst_reg)) - EMIT1(add_1mod(0x40, dst_reg)); + maybe_emit_1mod(&prog, dst_reg, + BPF_CLASS(insn->code) == BPF_JMP); if (is_imm8(imm32)) EMIT3(0x83, add_1reg(0xF8, dst_reg), imm32); From e5c15a363de6f87d5aff9a2674f77c49f70a9ca2 Mon Sep 17 00:00:00 2001 From: Johan Almbladh Date: Thu, 7 Oct 2021 16:23:39 +0200 Subject: [PATCH 030/181] mips, bpf: Fix Makefile that referenced a removed file This patch removes a stale Makefile reference to the cBPF JIT that was removed. Fixes: ebcbacfa50ec ("mips, bpf: Remove old BPF JIT implementations") Signed-off-by: Johan Almbladh Signed-off-by: Daniel Borkmann Link: https://lore.kernel.org/bpf/20211007142339.633899-1-johan.almbladh@anyfinetworks.com --- arch/mips/net/Makefile | 1 - 1 file changed, 1 deletion(-) diff --git a/arch/mips/net/Makefile b/arch/mips/net/Makefile index 602bf242b13fd..95e826781dbc5 100644 --- a/arch/mips/net/Makefile +++ b/arch/mips/net/Makefile @@ -1,7 +1,6 @@ # SPDX-License-Identifier: GPL-2.0-only # MIPS networking code -obj-$(CONFIG_MIPS_CBPF_JIT) += bpf_jit.o bpf_jit_asm.o obj-$(CONFIG_MIPS_EBPF_JIT) += bpf_jit_comp.o ifeq ($(CONFIG_32BIT),y) From bbf731b3f44d512efaec065435f3efd0cbdac68e Mon Sep 17 00:00:00 2001 From: Johan Almbladh Date: Thu, 7 Oct 2021 16:28:28 +0200 Subject: [PATCH 031/181] mips, bpf: Optimize loading of 64-bit constants This patch shaves off a few instructions when loading sparse 64-bit constants to register. The change is covered by additional tests in lib/test_bpf.c. Signed-off-by: Johan Almbladh Signed-off-by: Daniel Borkmann Link: https://lore.kernel.org/bpf/20211007142828.634182-1-johan.almbladh@anyfinetworks.com --- arch/mips/net/bpf_jit_comp64.c | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/arch/mips/net/bpf_jit_comp64.c b/arch/mips/net/bpf_jit_comp64.c index 1f1f7b87f213e..815ade7242278 100644 --- a/arch/mips/net/bpf_jit_comp64.c +++ b/arch/mips/net/bpf_jit_comp64.c @@ -131,19 +131,25 @@ static void emit_mov_i64(struct jit_context *ctx, u8 dst, u64 imm64) emit(ctx, ori, dst, dst, (u16)imm64 & 0xffff); } else { u8 acc = MIPS_R_ZERO; + int shift = 0; int k; for (k = 0; k < 4; k++) { u16 half = imm64 >> (48 - 16 * k); if (acc == dst) - emit(ctx, dsll, dst, dst, 16); + shift += 16; if (half) { + if (shift) + emit(ctx, dsll_safe, dst, dst, shift); emit(ctx, ori, dst, acc, half); acc = dst; + shift = 0; } } + if (shift) + emit(ctx, dsll_safe, dst, dst, shift); } clobber_reg(ctx, dst); } From 0eb4ef88c53f7169c44b1bd2ace5389981409a60 Mon Sep 17 00:00:00 2001 From: Johan Almbladh Date: Thu, 7 Oct 2021 16:30:06 +0200 Subject: [PATCH 032/181] bpf, tests: Add more LD_IMM64 tests This patch adds new tests for the two-instruction LD_IMM64. The new tests verify the operation with immediate values of different byte patterns. Mainly intended to cover JITs that want to be clever when loading 64-bit constants. Signed-off-by: Johan Almbladh Signed-off-by: Daniel Borkmann Link: https://lore.kernel.org/bpf/20211007143006.634308-1-johan.almbladh@anyfinetworks.com --- lib/test_bpf.c | 120 +++++++++++++++++++++++++++++++++++++++++++++++-- 1 file changed, 117 insertions(+), 3 deletions(-) diff --git a/lib/test_bpf.c b/lib/test_bpf.c index b9fc330fc83b0..e5b10fdefab54 100644 --- a/lib/test_bpf.c +++ b/lib/test_bpf.c @@ -2134,7 +2134,7 @@ static int bpf_fill_atomic32_cmpxchg_reg_pairs(struct bpf_test *self) * of the immediate value. This is often the case if the native instruction * immediate field width is narrower than 32 bits. */ -static int bpf_fill_ld_imm64(struct bpf_test *self) +static int bpf_fill_ld_imm64_magn(struct bpf_test *self) { int block = 64; /* Increase for more tests per MSB position */ int len = 3 + 8 * 63 * block * 2; @@ -2180,6 +2180,88 @@ static int bpf_fill_ld_imm64(struct bpf_test *self) return 0; } +/* + * Test the two-instruction 64-bit immediate load operation for different + * combinations of bytes. Each byte in the 64-bit word is constructed as + * (base & mask) | (rand() & ~mask), where rand() is a deterministic LCG. + * All patterns (base1, mask1) and (base2, mask2) bytes are tested. + */ +static int __bpf_fill_ld_imm64_bytes(struct bpf_test *self, + u8 base1, u8 mask1, + u8 base2, u8 mask2) +{ + struct bpf_insn *insn; + int len = 3 + 8 * BIT(8); + int pattern, index; + u32 rand = 1; + int i = 0; + + insn = kmalloc_array(len, sizeof(*insn), GFP_KERNEL); + if (!insn) + return -ENOMEM; + + insn[i++] = BPF_ALU64_IMM(BPF_MOV, R0, 0); + + for (pattern = 0; pattern < BIT(8); pattern++) { + u64 imm = 0; + + for (index = 0; index < 8; index++) { + int byte; + + if (pattern & BIT(index)) + byte = (base1 & mask1) | (rand & ~mask1); + else + byte = (base2 & mask2) | (rand & ~mask2); + imm = (imm << 8) | byte; + } + + /* Update our LCG */ + rand = rand * 1664525 + 1013904223; + + /* Perform operation */ + i += __bpf_ld_imm64(&insn[i], R1, imm); + + /* Load reference */ + insn[i++] = BPF_ALU32_IMM(BPF_MOV, R2, imm); + insn[i++] = BPF_ALU32_IMM(BPF_MOV, R3, (u32)(imm >> 32)); + insn[i++] = BPF_ALU64_IMM(BPF_LSH, R3, 32); + insn[i++] = BPF_ALU64_REG(BPF_OR, R2, R3); + + /* Check result */ + insn[i++] = BPF_JMP_REG(BPF_JEQ, R1, R2, 1); + insn[i++] = BPF_EXIT_INSN(); + } + + insn[i++] = BPF_ALU64_IMM(BPF_MOV, R0, 1); + insn[i++] = BPF_EXIT_INSN(); + + self->u.ptr.insns = insn; + self->u.ptr.len = len; + BUG_ON(i != len); + + return 0; +} + +static int bpf_fill_ld_imm64_checker(struct bpf_test *self) +{ + return __bpf_fill_ld_imm64_bytes(self, 0, 0xff, 0xff, 0xff); +} + +static int bpf_fill_ld_imm64_pos_neg(struct bpf_test *self) +{ + return __bpf_fill_ld_imm64_bytes(self, 1, 0x81, 0x80, 0x80); +} + +static int bpf_fill_ld_imm64_pos_zero(struct bpf_test *self) +{ + return __bpf_fill_ld_imm64_bytes(self, 1, 0x81, 0, 0xff); +} + +static int bpf_fill_ld_imm64_neg_zero(struct bpf_test *self) +{ + return __bpf_fill_ld_imm64_bytes(self, 0x80, 0x80, 0, 0xff); +} + /* * Exhaustive tests of JMP operations for all combinations of power-of-two * magnitudes of the operands, both for positive and negative values. The @@ -12401,14 +12483,46 @@ static struct bpf_test tests[] = { .fill_helper = bpf_fill_alu32_mod_reg, .nr_testruns = NR_PATTERN_RUNS, }, - /* LD_IMM64 immediate magnitudes */ + /* LD_IMM64 immediate magnitudes and byte patterns */ { "LD_IMM64: all immediate value magnitudes", { }, INTERNAL | FLAG_NO_DATA, { }, { { 0, 1 } }, - .fill_helper = bpf_fill_ld_imm64, + .fill_helper = bpf_fill_ld_imm64_magn, + }, + { + "LD_IMM64: checker byte patterns", + { }, + INTERNAL | FLAG_NO_DATA, + { }, + { { 0, 1 } }, + .fill_helper = bpf_fill_ld_imm64_checker, + }, + { + "LD_IMM64: random positive and zero byte patterns", + { }, + INTERNAL | FLAG_NO_DATA, + { }, + { { 0, 1 } }, + .fill_helper = bpf_fill_ld_imm64_pos_zero, + }, + { + "LD_IMM64: random negative and zero byte patterns", + { }, + INTERNAL | FLAG_NO_DATA, + { }, + { { 0, 1 } }, + .fill_helper = bpf_fill_ld_imm64_neg_zero, + }, + { + "LD_IMM64: random positive and negative byte patterns", + { }, + INTERNAL | FLAG_NO_DATA, + { }, + { { 0, 1 } }, + .fill_helper = bpf_fill_ld_imm64_pos_neg, }, /* 64-bit ATOMIC register combinations */ { From aa67fdb4643616f04cb59b6d090010c371ab1a80 Mon Sep 17 00:00:00 2001 From: Song Liu Date: Wed, 6 Oct 2021 22:02:31 -0700 Subject: [PATCH 033/181] selftests/bpf: Skip the second half of get_branch_snapshot in vm VMs running on upstream 5.12+ kernel support LBR. However, bpf_get_branch_snapshot couldn't stop the LBR before too many entries are flushed. Skip the hit/waste test for VMs before we find a proper fix for LBR in VM. Fixes: 025bd7c753aa ("selftests/bpf: Add test for bpf_get_branch_snapshot") Signed-off-by: Song Liu Signed-off-by: Andrii Nakryiko Link: https://lore.kernel.org/bpf/20211007050231.728496-1-songliubraving@fb.com --- .../bpf/prog_tests/get_branch_snapshot.c | 34 +++++++++++++++++++ 1 file changed, 34 insertions(+) diff --git a/tools/testing/selftests/bpf/prog_tests/get_branch_snapshot.c b/tools/testing/selftests/bpf/prog_tests/get_branch_snapshot.c index 67e86f8d86775..e4f92feb7b32c 100644 --- a/tools/testing/selftests/bpf/prog_tests/get_branch_snapshot.c +++ b/tools/testing/selftests/bpf/prog_tests/get_branch_snapshot.c @@ -6,6 +6,30 @@ static int *pfd_array; static int cpu_cnt; +static bool is_hypervisor(void) +{ + char *line = NULL; + bool ret = false; + size_t len; + FILE *fp; + + fp = fopen("/proc/cpuinfo", "r"); + if (!fp) + return false; + + while (getline(&line, &len, fp) != -1) { + if (!strncmp(line, "flags", 5)) { + if (strstr(line, "hypervisor") != NULL) + ret = true; + break; + } + } + + free(line); + fclose(fp); + return ret; +} + static int create_perf_events(void) { struct perf_event_attr attr = {0}; @@ -83,6 +107,16 @@ void test_get_branch_snapshot(void) goto cleanup; } + if (is_hypervisor()) { + /* As of today, LBR in hypervisor cannot be stopped before + * too many entries are flushed. Skip the hit/waste test + * for now in hypervisor until we optimize the LBR in + * hypervisor. + */ + test__skip(); + goto cleanup; + } + ASSERT_GT(skel->bss->test1_hits, 6, "find_looptest_in_lbr"); /* Given we stop LBR in software, we will waste a few entries. From dd65acf72d0e073970459d5da80573a04304aaa9 Mon Sep 17 00:00:00 2001 From: Dave Marchevsky Date: Thu, 7 Oct 2021 16:12:34 -0700 Subject: [PATCH 034/181] selftests/bpf: Remove SEC("version") from test progs Since commit 6c4fc209fcf9d ("bpf: remove useless version check for prog load") these "version" sections, which result in bpf_attr.kern_version being set, have been unnecessary. Remove them so that it's obvious to folks using selftests as a guide that "modern" BPF progs don't need this section. Signed-off-by: Dave Marchevsky Signed-off-by: Andrii Nakryiko Link: https://lore.kernel.org/bpf/20211007231234.2223081-1-davemarchevsky@fb.com --- tools/testing/selftests/bpf/progs/cgroup_skb_sk_lookup_kern.c | 1 - tools/testing/selftests/bpf/progs/connect4_prog.c | 2 -- tools/testing/selftests/bpf/progs/connect6_prog.c | 2 -- tools/testing/selftests/bpf/progs/connect_force_port4.c | 1 - tools/testing/selftests/bpf/progs/connect_force_port6.c | 1 - tools/testing/selftests/bpf/progs/dev_cgroup.c | 1 - tools/testing/selftests/bpf/progs/get_cgroup_id_kern.c | 1 - tools/testing/selftests/bpf/progs/map_ptr_kern.c | 1 - tools/testing/selftests/bpf/progs/netcnt_prog.c | 1 - tools/testing/selftests/bpf/progs/sendmsg4_prog.c | 2 -- tools/testing/selftests/bpf/progs/sendmsg6_prog.c | 2 -- tools/testing/selftests/bpf/progs/sockmap_parse_prog.c | 2 -- tools/testing/selftests/bpf/progs/sockmap_tcp_msg_prog.c | 2 -- tools/testing/selftests/bpf/progs/sockmap_verdict_prog.c | 2 -- tools/testing/selftests/bpf/progs/sockopt_inherit.c | 1 - tools/testing/selftests/bpf/progs/tcp_rtt.c | 1 - tools/testing/selftests/bpf/progs/test_btf_haskv.c | 2 -- tools/testing/selftests/bpf/progs/test_btf_newkv.c | 2 -- tools/testing/selftests/bpf/progs/test_btf_nokv.c | 2 -- tools/testing/selftests/bpf/progs/test_l4lb.c | 2 -- tools/testing/selftests/bpf/progs/test_map_in_map.c | 1 - tools/testing/selftests/bpf/progs/test_pinning.c | 2 -- tools/testing/selftests/bpf/progs/test_pinning_invalid.c | 2 -- tools/testing/selftests/bpf/progs/test_pkt_access.c | 1 - tools/testing/selftests/bpf/progs/test_queue_stack_map.h | 2 -- tools/testing/selftests/bpf/progs/test_select_reuseport_kern.c | 2 -- tools/testing/selftests/bpf/progs/test_sk_lookup.c | 1 - tools/testing/selftests/bpf/progs/test_skb_cgroup_id_kern.c | 2 -- tools/testing/selftests/bpf/progs/test_skb_ctx.c | 1 - tools/testing/selftests/bpf/progs/test_sockmap_kern.h | 1 - tools/testing/selftests/bpf/progs/test_sockmap_listen.c | 1 - tools/testing/selftests/bpf/progs/test_stacktrace_build_id.c | 1 - tools/testing/selftests/bpf/progs/test_tcp_estats.c | 1 - tools/testing/selftests/bpf/progs/test_tcpbpf_kern.c | 1 - tools/testing/selftests/bpf/progs/test_tcpnotify_kern.c | 2 -- tools/testing/selftests/bpf/progs/test_tracepoint.c | 1 - tools/testing/selftests/bpf/progs/test_tunnel_kern.c | 2 -- tools/testing/selftests/bpf/progs/test_xdp.c | 2 -- tools/testing/selftests/bpf/progs/test_xdp_loop.c | 2 -- tools/testing/selftests/bpf/progs/test_xdp_redirect.c | 2 -- 40 files changed, 61 deletions(-) diff --git a/tools/testing/selftests/bpf/progs/cgroup_skb_sk_lookup_kern.c b/tools/testing/selftests/bpf/progs/cgroup_skb_sk_lookup_kern.c index 3f757e30d7a06..88638315c582c 100644 --- a/tools/testing/selftests/bpf/progs/cgroup_skb_sk_lookup_kern.c +++ b/tools/testing/selftests/bpf/progs/cgroup_skb_sk_lookup_kern.c @@ -14,7 +14,6 @@ #include #include -int _version SEC("version") = 1; char _license[] SEC("license") = "GPL"; __u16 g_serv_port = 0; diff --git a/tools/testing/selftests/bpf/progs/connect4_prog.c b/tools/testing/selftests/bpf/progs/connect4_prog.c index a943d394fd3a0..b241932911dbb 100644 --- a/tools/testing/selftests/bpf/progs/connect4_prog.c +++ b/tools/testing/selftests/bpf/progs/connect4_prog.c @@ -31,8 +31,6 @@ #define IFNAMSIZ 16 #endif -int _version SEC("version") = 1; - __attribute__ ((noinline)) int do_bind(struct bpf_sock_addr *ctx) { diff --git a/tools/testing/selftests/bpf/progs/connect6_prog.c b/tools/testing/selftests/bpf/progs/connect6_prog.c index 506d0f81a3756..40266d2c737c2 100644 --- a/tools/testing/selftests/bpf/progs/connect6_prog.c +++ b/tools/testing/selftests/bpf/progs/connect6_prog.c @@ -24,8 +24,6 @@ #define DST_REWRITE_PORT6 6666 -int _version SEC("version") = 1; - SEC("cgroup/connect6") int connect_v6_prog(struct bpf_sock_addr *ctx) { diff --git a/tools/testing/selftests/bpf/progs/connect_force_port4.c b/tools/testing/selftests/bpf/progs/connect_force_port4.c index a979aaef2a764..27a632dd382e0 100644 --- a/tools/testing/selftests/bpf/progs/connect_force_port4.c +++ b/tools/testing/selftests/bpf/progs/connect_force_port4.c @@ -13,7 +13,6 @@ #include char _license[] SEC("license") = "GPL"; -int _version SEC("version") = 1; struct svc_addr { __be32 addr; diff --git a/tools/testing/selftests/bpf/progs/connect_force_port6.c b/tools/testing/selftests/bpf/progs/connect_force_port6.c index afc8f1c5a9d60..19cad93e612fc 100644 --- a/tools/testing/selftests/bpf/progs/connect_force_port6.c +++ b/tools/testing/selftests/bpf/progs/connect_force_port6.c @@ -12,7 +12,6 @@ #include char _license[] SEC("license") = "GPL"; -int _version SEC("version") = 1; struct svc_addr { __be32 addr[4]; diff --git a/tools/testing/selftests/bpf/progs/dev_cgroup.c b/tools/testing/selftests/bpf/progs/dev_cgroup.c index 8924e06bdef06..79b54a4fa2446 100644 --- a/tools/testing/selftests/bpf/progs/dev_cgroup.c +++ b/tools/testing/selftests/bpf/progs/dev_cgroup.c @@ -57,4 +57,3 @@ int bpf_prog1(struct bpf_cgroup_dev_ctx *ctx) } char _license[] SEC("license") = "GPL"; -__u32 _version SEC("version") = LINUX_VERSION_CODE; diff --git a/tools/testing/selftests/bpf/progs/get_cgroup_id_kern.c b/tools/testing/selftests/bpf/progs/get_cgroup_id_kern.c index 6b42db2fe391a..68587b1de34e2 100644 --- a/tools/testing/selftests/bpf/progs/get_cgroup_id_kern.c +++ b/tools/testing/selftests/bpf/progs/get_cgroup_id_kern.c @@ -37,4 +37,3 @@ int trace(void *ctx) } char _license[] SEC("license") = "GPL"; -__u32 _version SEC("version") = 1; /* ignored by tracepoints, required by libbpf.a */ diff --git a/tools/testing/selftests/bpf/progs/map_ptr_kern.c b/tools/testing/selftests/bpf/progs/map_ptr_kern.c index d1d304c980f0c..b1b711d9b214d 100644 --- a/tools/testing/selftests/bpf/progs/map_ptr_kern.c +++ b/tools/testing/selftests/bpf/progs/map_ptr_kern.c @@ -683,5 +683,4 @@ int cg_skb(void *ctx) return 1; } -__u32 _version SEC("version") = 1; char _license[] SEC("license") = "GPL"; diff --git a/tools/testing/selftests/bpf/progs/netcnt_prog.c b/tools/testing/selftests/bpf/progs/netcnt_prog.c index 43649bce4c540..f718b2c212dc8 100644 --- a/tools/testing/selftests/bpf/progs/netcnt_prog.c +++ b/tools/testing/selftests/bpf/progs/netcnt_prog.c @@ -68,4 +68,3 @@ int bpf_nextcnt(struct __sk_buff *skb) } char _license[] SEC("license") = "GPL"; -__u32 _version SEC("version") = LINUX_VERSION_CODE; diff --git a/tools/testing/selftests/bpf/progs/sendmsg4_prog.c b/tools/testing/selftests/bpf/progs/sendmsg4_prog.c index ac5abc34cde83..ea75a44cb7fce 100644 --- a/tools/testing/selftests/bpf/progs/sendmsg4_prog.c +++ b/tools/testing/selftests/bpf/progs/sendmsg4_prog.c @@ -18,8 +18,6 @@ #define DST_PORT 4040 #define DST_REWRITE_PORT4 4444 -int _version SEC("version") = 1; - SEC("cgroup/sendmsg4") int sendmsg_v4_prog(struct bpf_sock_addr *ctx) { diff --git a/tools/testing/selftests/bpf/progs/sendmsg6_prog.c b/tools/testing/selftests/bpf/progs/sendmsg6_prog.c index 24694b1a8d82b..bf9b46b806f6a 100644 --- a/tools/testing/selftests/bpf/progs/sendmsg6_prog.c +++ b/tools/testing/selftests/bpf/progs/sendmsg6_prog.c @@ -22,8 +22,6 @@ #define DST_REWRITE_PORT6 6666 -int _version SEC("version") = 1; - SEC("cgroup/sendmsg6") int sendmsg_v6_prog(struct bpf_sock_addr *ctx) { diff --git a/tools/testing/selftests/bpf/progs/sockmap_parse_prog.c b/tools/testing/selftests/bpf/progs/sockmap_parse_prog.c index ca283af80d4e9..95d5b941bc1f8 100644 --- a/tools/testing/selftests/bpf/progs/sockmap_parse_prog.c +++ b/tools/testing/selftests/bpf/progs/sockmap_parse_prog.c @@ -2,8 +2,6 @@ #include #include -int _version SEC("version") = 1; - SEC("sk_skb1") int bpf_prog1(struct __sk_buff *skb) { diff --git a/tools/testing/selftests/bpf/progs/sockmap_tcp_msg_prog.c b/tools/testing/selftests/bpf/progs/sockmap_tcp_msg_prog.c index eeaf6e75c9a26..80632954c5a16 100644 --- a/tools/testing/selftests/bpf/progs/sockmap_tcp_msg_prog.c +++ b/tools/testing/selftests/bpf/progs/sockmap_tcp_msg_prog.c @@ -3,8 +3,6 @@ #include #include -int _version SEC("version") = 1; - SEC("sk_msg1") int bpf_prog1(struct sk_msg_md *msg) { diff --git a/tools/testing/selftests/bpf/progs/sockmap_verdict_prog.c b/tools/testing/selftests/bpf/progs/sockmap_verdict_prog.c index 73872c535cbb0..e2468a6d01a59 100644 --- a/tools/testing/selftests/bpf/progs/sockmap_verdict_prog.c +++ b/tools/testing/selftests/bpf/progs/sockmap_verdict_prog.c @@ -2,8 +2,6 @@ #include #include -int _version SEC("version") = 1; - struct { __uint(type, BPF_MAP_TYPE_SOCKMAP); __uint(max_entries, 20); diff --git a/tools/testing/selftests/bpf/progs/sockopt_inherit.c b/tools/testing/selftests/bpf/progs/sockopt_inherit.c index c6d428a8d7857..9fb241b972919 100644 --- a/tools/testing/selftests/bpf/progs/sockopt_inherit.c +++ b/tools/testing/selftests/bpf/progs/sockopt_inherit.c @@ -3,7 +3,6 @@ #include char _license[] SEC("license") = "GPL"; -__u32 _version SEC("version") = 1; #define SOL_CUSTOM 0xdeadbeef #define CUSTOM_INHERIT1 0 diff --git a/tools/testing/selftests/bpf/progs/tcp_rtt.c b/tools/testing/selftests/bpf/progs/tcp_rtt.c index 0cb3204ddb182..0988d79f15877 100644 --- a/tools/testing/selftests/bpf/progs/tcp_rtt.c +++ b/tools/testing/selftests/bpf/progs/tcp_rtt.c @@ -3,7 +3,6 @@ #include char _license[] SEC("license") = "GPL"; -__u32 _version SEC("version") = 1; struct tcp_rtt_storage { __u32 invoked; diff --git a/tools/testing/selftests/bpf/progs/test_btf_haskv.c b/tools/testing/selftests/bpf/progs/test_btf_haskv.c index 31538c9ed1939..160ead6c67b20 100644 --- a/tools/testing/selftests/bpf/progs/test_btf_haskv.c +++ b/tools/testing/selftests/bpf/progs/test_btf_haskv.c @@ -4,8 +4,6 @@ #include #include "bpf_legacy.h" -int _version SEC("version") = 1; - struct ipv_counts { unsigned int v4; unsigned int v6; diff --git a/tools/testing/selftests/bpf/progs/test_btf_newkv.c b/tools/testing/selftests/bpf/progs/test_btf_newkv.c index 6c5560162746b..1884a5bd10f56 100644 --- a/tools/testing/selftests/bpf/progs/test_btf_newkv.c +++ b/tools/testing/selftests/bpf/progs/test_btf_newkv.c @@ -4,8 +4,6 @@ #include #include "bpf_legacy.h" -int _version SEC("version") = 1; - struct ipv_counts { unsigned int v4; unsigned int v6; diff --git a/tools/testing/selftests/bpf/progs/test_btf_nokv.c b/tools/testing/selftests/bpf/progs/test_btf_nokv.c index 506da7fd2da23..15e0f9945fe46 100644 --- a/tools/testing/selftests/bpf/progs/test_btf_nokv.c +++ b/tools/testing/selftests/bpf/progs/test_btf_nokv.c @@ -3,8 +3,6 @@ #include #include -int _version SEC("version") = 1; - struct ipv_counts { unsigned int v4; unsigned int v6; diff --git a/tools/testing/selftests/bpf/progs/test_l4lb.c b/tools/testing/selftests/bpf/progs/test_l4lb.c index 33493911d87a4..04fee08863cb1 100644 --- a/tools/testing/selftests/bpf/progs/test_l4lb.c +++ b/tools/testing/selftests/bpf/progs/test_l4lb.c @@ -21,8 +21,6 @@ #include "test_iptunnel_common.h" #include -int _version SEC("version") = 1; - static inline __u32 rol32(__u32 word, unsigned int shift) { return (word << shift) | (word >> ((-shift) & 31)); diff --git a/tools/testing/selftests/bpf/progs/test_map_in_map.c b/tools/testing/selftests/bpf/progs/test_map_in_map.c index a6d91932dcd51..f416032ba858b 100644 --- a/tools/testing/selftests/bpf/progs/test_map_in_map.c +++ b/tools/testing/selftests/bpf/progs/test_map_in_map.c @@ -47,5 +47,4 @@ int xdp_mimtest0(struct xdp_md *ctx) return XDP_PASS; } -int _version SEC("version") = 1; char _license[] SEC("license") = "GPL"; diff --git a/tools/testing/selftests/bpf/progs/test_pinning.c b/tools/testing/selftests/bpf/progs/test_pinning.c index 4ef2630292b26..0facea6cbbaed 100644 --- a/tools/testing/selftests/bpf/progs/test_pinning.c +++ b/tools/testing/selftests/bpf/progs/test_pinning.c @@ -3,8 +3,6 @@ #include #include -int _version SEC("version") = 1; - struct { __uint(type, BPF_MAP_TYPE_ARRAY); __uint(max_entries, 1); diff --git a/tools/testing/selftests/bpf/progs/test_pinning_invalid.c b/tools/testing/selftests/bpf/progs/test_pinning_invalid.c index 5412e0c732c76..2a56db1094b87 100644 --- a/tools/testing/selftests/bpf/progs/test_pinning_invalid.c +++ b/tools/testing/selftests/bpf/progs/test_pinning_invalid.c @@ -3,8 +3,6 @@ #include #include -int _version SEC("version") = 1; - struct { __uint(type, BPF_MAP_TYPE_ARRAY); __uint(max_entries, 1); diff --git a/tools/testing/selftests/bpf/progs/test_pkt_access.c b/tools/testing/selftests/bpf/progs/test_pkt_access.c index 3cfd88141ddc1..0558544e1ff0f 100644 --- a/tools/testing/selftests/bpf/progs/test_pkt_access.c +++ b/tools/testing/selftests/bpf/progs/test_pkt_access.c @@ -15,7 +15,6 @@ #include #define barrier() __asm__ __volatile__("": : :"memory") -int _version SEC("version") = 1; /* llvm will optimize both subprograms into exactly the same BPF assembly * diff --git a/tools/testing/selftests/bpf/progs/test_queue_stack_map.h b/tools/testing/selftests/bpf/progs/test_queue_stack_map.h index 4dd9806ad73b5..0fcd3ff0e38a2 100644 --- a/tools/testing/selftests/bpf/progs/test_queue_stack_map.h +++ b/tools/testing/selftests/bpf/progs/test_queue_stack_map.h @@ -8,8 +8,6 @@ #include #include -int _version SEC("version") = 1; - struct { __uint(type, MAP_TYPE); __uint(max_entries, 32); diff --git a/tools/testing/selftests/bpf/progs/test_select_reuseport_kern.c b/tools/testing/selftests/bpf/progs/test_select_reuseport_kern.c index 0f9bc258225ec..7d56ed47cd4d8 100644 --- a/tools/testing/selftests/bpf/progs/test_select_reuseport_kern.c +++ b/tools/testing/selftests/bpf/progs/test_select_reuseport_kern.c @@ -15,8 +15,6 @@ #include #include "test_select_reuseport_common.h" -int _version SEC("version") = 1; - #ifndef offsetof #define offsetof(TYPE, MEMBER) ((size_t) &((TYPE *)0)->MEMBER) #endif diff --git a/tools/testing/selftests/bpf/progs/test_sk_lookup.c b/tools/testing/selftests/bpf/progs/test_sk_lookup.c index 48534d8103918..19d2465d94425 100644 --- a/tools/testing/selftests/bpf/progs/test_sk_lookup.c +++ b/tools/testing/selftests/bpf/progs/test_sk_lookup.c @@ -644,4 +644,3 @@ int multi_prog_redir2(struct bpf_sk_lookup *ctx) } char _license[] SEC("license") = "Dual BSD/GPL"; -__u32 _version SEC("version") = 1; diff --git a/tools/testing/selftests/bpf/progs/test_skb_cgroup_id_kern.c b/tools/testing/selftests/bpf/progs/test_skb_cgroup_id_kern.c index 552f2090665c5..c304cd5b8cad7 100644 --- a/tools/testing/selftests/bpf/progs/test_skb_cgroup_id_kern.c +++ b/tools/testing/selftests/bpf/progs/test_skb_cgroup_id_kern.c @@ -42,6 +42,4 @@ int log_cgroup_id(struct __sk_buff *skb) return TC_ACT_OK; } -int _version SEC("version") = 1; - char _license[] SEC("license") = "GPL"; diff --git a/tools/testing/selftests/bpf/progs/test_skb_ctx.c b/tools/testing/selftests/bpf/progs/test_skb_ctx.c index ba4dab09d19c4..1d61b36e6067e 100644 --- a/tools/testing/selftests/bpf/progs/test_skb_ctx.c +++ b/tools/testing/selftests/bpf/progs/test_skb_ctx.c @@ -3,7 +3,6 @@ #include #include -int _version SEC("version") = 1; char _license[] SEC("license") = "GPL"; SEC("skb_ctx") diff --git a/tools/testing/selftests/bpf/progs/test_sockmap_kern.h b/tools/testing/selftests/bpf/progs/test_sockmap_kern.h index 1858435de7aaf..2966564b8497a 100644 --- a/tools/testing/selftests/bpf/progs/test_sockmap_kern.h +++ b/tools/testing/selftests/bpf/progs/test_sockmap_kern.h @@ -361,5 +361,4 @@ int bpf_prog10(struct sk_msg_md *msg) return SK_DROP; } -int _version SEC("version") = 1; char _license[] SEC("license") = "GPL"; diff --git a/tools/testing/selftests/bpf/progs/test_sockmap_listen.c b/tools/testing/selftests/bpf/progs/test_sockmap_listen.c index 00f1456aaeda2..325c9f193432a 100644 --- a/tools/testing/selftests/bpf/progs/test_sockmap_listen.c +++ b/tools/testing/selftests/bpf/progs/test_sockmap_listen.c @@ -116,5 +116,4 @@ int prog_reuseport(struct sk_reuseport_md *reuse) return verdict; } -int _version SEC("version") = 1; char _license[] SEC("license") = "GPL"; diff --git a/tools/testing/selftests/bpf/progs/test_stacktrace_build_id.c b/tools/testing/selftests/bpf/progs/test_stacktrace_build_id.c index 7449fdb1763b1..36a707e7c7a7a 100644 --- a/tools/testing/selftests/bpf/progs/test_stacktrace_build_id.c +++ b/tools/testing/selftests/bpf/progs/test_stacktrace_build_id.c @@ -73,4 +73,3 @@ int oncpu(struct random_urandom_args *args) } char _license[] SEC("license") = "GPL"; -__u32 _version SEC("version") = 1; /* ignored by tracepoints, required by libbpf.a */ diff --git a/tools/testing/selftests/bpf/progs/test_tcp_estats.c b/tools/testing/selftests/bpf/progs/test_tcp_estats.c index adc83a54c3527..2c5c602c60114 100644 --- a/tools/testing/selftests/bpf/progs/test_tcp_estats.c +++ b/tools/testing/selftests/bpf/progs/test_tcp_estats.c @@ -255,4 +255,3 @@ int _dummy_tracepoint(struct dummy_tracepoint_args *arg) } char _license[] SEC("license") = "GPL"; -__u32 _version SEC("version") = 1; /* ignored by tracepoints, required by libbpf.a */ diff --git a/tools/testing/selftests/bpf/progs/test_tcpbpf_kern.c b/tools/testing/selftests/bpf/progs/test_tcpbpf_kern.c index 94f50f7e94d64..3ded052807576 100644 --- a/tools/testing/selftests/bpf/progs/test_tcpbpf_kern.c +++ b/tools/testing/selftests/bpf/progs/test_tcpbpf_kern.c @@ -16,7 +16,6 @@ #include "test_tcpbpf.h" struct tcpbpf_globals global = {}; -int _version SEC("version") = 1; /** * SOL_TCP is defined in while diff --git a/tools/testing/selftests/bpf/progs/test_tcpnotify_kern.c b/tools/testing/selftests/bpf/progs/test_tcpnotify_kern.c index 24e9344994ef9..540181c115a85 100644 --- a/tools/testing/selftests/bpf/progs/test_tcpnotify_kern.c +++ b/tools/testing/selftests/bpf/progs/test_tcpnotify_kern.c @@ -28,8 +28,6 @@ struct { __type(value, __u32); } perf_event_map SEC(".maps"); -int _version SEC("version") = 1; - SEC("sockops") int bpf_testcb(struct bpf_sock_ops *skops) { diff --git a/tools/testing/selftests/bpf/progs/test_tracepoint.c b/tools/testing/selftests/bpf/progs/test_tracepoint.c index 4b825ee122cf8..ce6974016f53f 100644 --- a/tools/testing/selftests/bpf/progs/test_tracepoint.c +++ b/tools/testing/selftests/bpf/progs/test_tracepoint.c @@ -23,4 +23,3 @@ int oncpu(struct sched_switch_args *ctx) } char _license[] SEC("license") = "GPL"; -__u32 _version SEC("version") = 1; /* ignored by tracepoints, required by libbpf.a */ diff --git a/tools/testing/selftests/bpf/progs/test_tunnel_kern.c b/tools/testing/selftests/bpf/progs/test_tunnel_kern.c index e7b6731174364..ef0dde83b85ac 100644 --- a/tools/testing/selftests/bpf/progs/test_tunnel_kern.c +++ b/tools/testing/selftests/bpf/progs/test_tunnel_kern.c @@ -26,8 +26,6 @@ bpf_trace_printk(fmt, sizeof(fmt), __LINE__, ret); \ } while (0) -int _version SEC("version") = 1; - struct geneve_opt { __be16 opt_class; __u8 type; diff --git a/tools/testing/selftests/bpf/progs/test_xdp.c b/tools/testing/selftests/bpf/progs/test_xdp.c index e6aa2fc6ce6bd..d7a9a74b72453 100644 --- a/tools/testing/selftests/bpf/progs/test_xdp.c +++ b/tools/testing/selftests/bpf/progs/test_xdp.c @@ -20,8 +20,6 @@ #include #include "test_iptunnel_common.h" -int _version SEC("version") = 1; - struct { __uint(type, BPF_MAP_TYPE_PERCPU_ARRAY); __uint(max_entries, 256); diff --git a/tools/testing/selftests/bpf/progs/test_xdp_loop.c b/tools/testing/selftests/bpf/progs/test_xdp_loop.c index 27eb52dda92c2..c98fb44156f04 100644 --- a/tools/testing/selftests/bpf/progs/test_xdp_loop.c +++ b/tools/testing/selftests/bpf/progs/test_xdp_loop.c @@ -16,8 +16,6 @@ #include #include "test_iptunnel_common.h" -int _version SEC("version") = 1; - struct { __uint(type, BPF_MAP_TYPE_PERCPU_ARRAY); __uint(max_entries, 256); diff --git a/tools/testing/selftests/bpf/progs/test_xdp_redirect.c b/tools/testing/selftests/bpf/progs/test_xdp_redirect.c index a5337cd9400be..b778cad454852 100644 --- a/tools/testing/selftests/bpf/progs/test_xdp_redirect.c +++ b/tools/testing/selftests/bpf/progs/test_xdp_redirect.c @@ -12,8 +12,6 @@ #include #include -int _version SEC("version") = 1; - SEC("redirect_to_111") int xdp_redirect_to_111(struct xdp_md *xdp) { From 7e3cbd3405cb7b6c036b8984baa694bc55c08e46 Mon Sep 17 00:00:00 2001 From: Yucong Sun Date: Fri, 8 Oct 2021 10:31:39 -0700 Subject: [PATCH 035/181] selftests/bpf: Fix btf_dump test under new clang New clang version changed ([0]) type name in dwarf from "long int" to "long", this is causing btf_dump tests to fail. [0] https://github.com/llvm/llvm-project/commit/f6a561c4d6754b13165a49990e8365d819f64c86 Signed-off-by: Yucong Sun Signed-off-by: Andrii Nakryiko Link: https://lore.kernel.org/bpf/20211008173139.1457407-1-fallentree@fb.com --- .../selftests/bpf/progs/btf_dump_test_case_bitfields.c | 10 +++++----- .../selftests/bpf/progs/btf_dump_test_case_packing.c | 4 ++-- .../selftests/bpf/progs/btf_dump_test_case_padding.c | 2 +- .../selftests/bpf/progs/btf_dump_test_case_syntax.c | 2 +- 4 files changed, 9 insertions(+), 9 deletions(-) diff --git a/tools/testing/selftests/bpf/progs/btf_dump_test_case_bitfields.c b/tools/testing/selftests/bpf/progs/btf_dump_test_case_bitfields.c index 8f44767a75fa5..e5560a6560309 100644 --- a/tools/testing/selftests/bpf/progs/btf_dump_test_case_bitfields.c +++ b/tools/testing/selftests/bpf/progs/btf_dump_test_case_bitfields.c @@ -11,7 +11,7 @@ /* *struct bitfields_only_mixed_types { * int a: 3; - * long int b: 2; + * long b: 2; * _Bool c: 1; * enum { * A = 0, @@ -27,7 +27,7 @@ struct bitfields_only_mixed_types { int a: 3; - long int b: 2; + long b: 2; bool c: 1; /* it's really a _Bool type */ enum { A, /* A = 0, dumper is very explicit */ @@ -44,8 +44,8 @@ struct bitfields_only_mixed_types { * char: 4; * int a: 4; * short b; - * long int c; - * long int d: 8; + * long c; + * long d: 8; * int e; * int f; *}; @@ -71,7 +71,7 @@ struct bitfield_mixed_with_others { *struct bitfield_flushed { * int a: 4; * long: 60; - * long int b: 16; + * long b: 16; *}; * */ diff --git a/tools/testing/selftests/bpf/progs/btf_dump_test_case_packing.c b/tools/testing/selftests/bpf/progs/btf_dump_test_case_packing.c index 1cef3bec1dc7f..e304b6204bd9d 100644 --- a/tools/testing/selftests/bpf/progs/btf_dump_test_case_packing.c +++ b/tools/testing/selftests/bpf/progs/btf_dump_test_case_packing.c @@ -29,7 +29,7 @@ struct non_packed_fields { struct nested_packed { char: 4; int a: 4; - long int b; + long b; struct { char c; int d; @@ -44,7 +44,7 @@ union union_is_never_packed { union union_does_not_need_packing { struct { - long int a; + long a; int b; } __attribute__((packed)); int c; diff --git a/tools/testing/selftests/bpf/progs/btf_dump_test_case_padding.c b/tools/testing/selftests/bpf/progs/btf_dump_test_case_padding.c index 35c512818a56b..f2661c8d2d900 100644 --- a/tools/testing/selftests/bpf/progs/btf_dump_test_case_padding.c +++ b/tools/testing/selftests/bpf/progs/btf_dump_test_case_padding.c @@ -9,7 +9,7 @@ /* ----- START-EXPECTED-OUTPUT ----- */ struct padded_implicitly { int a; - long int b; + long b; char c; }; diff --git a/tools/testing/selftests/bpf/progs/btf_dump_test_case_syntax.c b/tools/testing/selftests/bpf/progs/btf_dump_test_case_syntax.c index 8aaa24a003220..1c7105fcae3c4 100644 --- a/tools/testing/selftests/bpf/progs/btf_dump_test_case_syntax.c +++ b/tools/testing/selftests/bpf/progs/btf_dump_test_case_syntax.c @@ -189,7 +189,7 @@ struct struct_with_embedded_stuff { const char *d; } e; union { - volatile long int f; + volatile long f; void * restrict g; }; }; From b79c2ce3baa99beea7f8410ce3154cc23e26dbd8 Mon Sep 17 00:00:00 2001 From: Quentin Monnet Date: Thu, 7 Oct 2021 20:44:27 +0100 Subject: [PATCH 036/181] libbpf: Skip re-installing headers file if source is older than target The "install_headers" target in libbpf's Makefile would unconditionally export all API headers to the target directory. When those headers are installed to compile another application, this means that make always finds newer dependencies for the source files relying on those headers, and deduces that the targets should be rebuilt. Avoid that by making "install_headers" depend on the source header files, and (re-)install them only when necessary. Signed-off-by: Quentin Monnet Signed-off-by: Andrii Nakryiko Link: https://lore.kernel.org/bpf/20211007194438.34443-2-quentin@isovalent.com --- tools/lib/bpf/Makefile | 27 ++++++++++++++++++--------- 1 file changed, 18 insertions(+), 9 deletions(-) diff --git a/tools/lib/bpf/Makefile b/tools/lib/bpf/Makefile index 41e4f78dbad5b..9c6804ca5b451 100644 --- a/tools/lib/bpf/Makefile +++ b/tools/lib/bpf/Makefile @@ -241,15 +241,24 @@ install_lib: all_cmd $(call do_install_mkdir,$(libdir_SQ)); \ cp -fpR $(LIB_FILE) $(DESTDIR)$(libdir_SQ) -INSTALL_HEADERS = bpf.h libbpf.h btf.h libbpf_common.h libbpf_legacy.h xsk.h \ - bpf_helpers.h $(BPF_GENERATED) bpf_tracing.h \ - bpf_endian.h bpf_core_read.h skel_internal.h \ - libbpf_version.h - -install_headers: $(BPF_GENERATED) - $(call QUIET_INSTALL, headers) \ - $(foreach hdr,$(INSTALL_HEADERS), \ - $(call do_install,$(hdr),$(prefix)/include/bpf,644);) +SRC_HDRS := bpf.h libbpf.h btf.h libbpf_common.h libbpf_legacy.h xsk.h \ + bpf_helpers.h bpf_tracing.h bpf_endian.h bpf_core_read.h \ + skel_internal.h libbpf_version.h +GEN_HDRS := $(BPF_GENERATED) + +INSTALL_PFX := $(DESTDIR)$(prefix)/include/bpf +INSTALL_SRC_HDRS := $(addprefix $(INSTALL_PFX)/, $(SRC_HDRS)) +INSTALL_GEN_HDRS := $(addprefix $(INSTALL_PFX)/, $(notdir $(GEN_HDRS))) + +$(INSTALL_SRC_HDRS): $(INSTALL_PFX)/%.h: %.h + $(call QUIET_INSTALL, $@) \ + $(call do_install,$<,$(prefix)/include/bpf,644) + +$(INSTALL_GEN_HDRS): $(INSTALL_PFX)/%.h: $(OUTPUT)%.h + $(call QUIET_INSTALL, $@) \ + $(call do_install,$<,$(prefix)/include/bpf,644) + +install_headers: $(BPF_GENERATED) $(INSTALL_SRC_HDRS) $(INSTALL_GEN_HDRS) install_pkgconfig: $(PC_FILE) $(call QUIET_INSTALL, $(PC_FILE)) \ From c66a248f1950d41502fb67624147281d9de0e868 Mon Sep 17 00:00:00 2001 From: Quentin Monnet Date: Thu, 7 Oct 2021 20:44:28 +0100 Subject: [PATCH 037/181] bpftool: Remove unused includes to It seems that the header file was never necessary to compile bpftool, and it is not part of the headers exported from libbpf. Let's remove the includes from prog.c and gen.c. Fixes: d510296d331a ("bpftool: Use syscall/loader program in "prog load" and "gen skeleton" command.") Signed-off-by: Quentin Monnet Signed-off-by: Andrii Nakryiko Link: https://lore.kernel.org/bpf/20211007194438.34443-3-quentin@isovalent.com --- tools/bpf/bpftool/gen.c | 1 - tools/bpf/bpftool/prog.c | 1 - 2 files changed, 2 deletions(-) diff --git a/tools/bpf/bpftool/gen.c b/tools/bpf/bpftool/gen.c index cc835859465b6..b2ffc18eafc18 100644 --- a/tools/bpf/bpftool/gen.c +++ b/tools/bpf/bpftool/gen.c @@ -18,7 +18,6 @@ #include #include #include -#include #include "json_writer.h" #include "main.h" diff --git a/tools/bpf/bpftool/prog.c b/tools/bpf/bpftool/prog.c index a24ea7e26aa43..277d51c4c5d98 100644 --- a/tools/bpf/bpftool/prog.c +++ b/tools/bpf/bpftool/prog.c @@ -25,7 +25,6 @@ #include #include #include -#include #include #include "cfg.h" From f012ade10b34c461663bc3dd957636be06804b0d Mon Sep 17 00:00:00 2001 From: Quentin Monnet Date: Thu, 7 Oct 2021 20:44:29 +0100 Subject: [PATCH 038/181] bpftool: Install libbpf headers instead of including the dir Bpftool relies on libbpf, therefore it relies on a number of headers from the library and must be linked against the library. The Makefile for bpftool exposes these objects by adding tools/lib as an include directory ("-I$(srctree)/tools/lib"). This is a working solution, but this is not the cleanest one. The risk is to involuntarily include objects that are not intended to be exposed by the libbpf. The headers needed to compile bpftool should in fact be "installed" from libbpf, with its "install_headers" Makefile target. In addition, there is one header which is internal to the library and not supposed to be used by external applications, but that bpftool uses anyway. Adjust the Makefile in order to install the header files properly before compiling bpftool. Also copy the additional internal header file (nlattr.h), but call it out explicitly. Build (and install headers) in a subdirectory under bpftool/ instead of tools/lib/bpf/. When descending from a parent Makefile, this is configurable by setting the OUTPUT, LIBBPF_OUTPUT and LIBBPF_DESTDIR variables. Also adjust the Makefile for BPF selftests, so as to reuse the (host) libbpf compiled earlier and to avoid compiling a separate version of the library just for bpftool. Signed-off-by: Quentin Monnet Signed-off-by: Andrii Nakryiko Acked-by: Andrii Nakryiko Link: https://lore.kernel.org/bpf/20211007194438.34443-4-quentin@isovalent.com --- tools/bpf/bpftool/Makefile | 33 ++++++++++++++++++---------- tools/testing/selftests/bpf/Makefile | 2 ++ 2 files changed, 23 insertions(+), 12 deletions(-) diff --git a/tools/bpf/bpftool/Makefile b/tools/bpf/bpftool/Makefile index 1fcf5b01a193c..ba02d71c39efd 100644 --- a/tools/bpf/bpftool/Makefile +++ b/tools/bpf/bpftool/Makefile @@ -17,19 +17,23 @@ endif BPF_DIR = $(srctree)/tools/lib/bpf/ ifneq ($(OUTPUT),) - LIBBPF_OUTPUT = $(OUTPUT)/libbpf/ - LIBBPF_PATH = $(LIBBPF_OUTPUT) - BOOTSTRAP_OUTPUT = $(OUTPUT)/bootstrap/ + _OUTPUT := $(OUTPUT) else - LIBBPF_OUTPUT = - LIBBPF_PATH = $(BPF_DIR) - BOOTSTRAP_OUTPUT = $(CURDIR)/bootstrap/ + _OUTPUT := $(CURDIR) endif +BOOTSTRAP_OUTPUT := $(_OUTPUT)/bootstrap/ +LIBBPF_OUTPUT := $(_OUTPUT)/libbpf/ +LIBBPF_DESTDIR := $(LIBBPF_OUTPUT) +LIBBPF_INCLUDE := $(LIBBPF_DESTDIR)/include -LIBBPF = $(LIBBPF_PATH)libbpf.a +LIBBPF = $(LIBBPF_OUTPUT)libbpf.a LIBBPF_BOOTSTRAP_OUTPUT = $(BOOTSTRAP_OUTPUT)libbpf/ LIBBPF_BOOTSTRAP = $(LIBBPF_BOOTSTRAP_OUTPUT)libbpf.a +# We need to copy nlattr.h which is not otherwise exported by libbpf, but still +# required by bpftool. +LIBBPF_INTERNAL_HDRS := nlattr.h + ifeq ($(BPFTOOL_VERSION),) BPFTOOL_VERSION := $(shell make -rR --no-print-directory -sC ../../.. kernelversion) endif @@ -38,7 +42,13 @@ $(LIBBPF_OUTPUT) $(BOOTSTRAP_OUTPUT) $(LIBBPF_BOOTSTRAP_OUTPUT): $(QUIET_MKDIR)mkdir -p $@ $(LIBBPF): FORCE | $(LIBBPF_OUTPUT) - $(Q)$(MAKE) -C $(BPF_DIR) OUTPUT=$(LIBBPF_OUTPUT) $(LIBBPF_OUTPUT)libbpf.a + $(Q)$(MAKE) -C $(BPF_DIR) OUTPUT=$(LIBBPF_OUTPUT) \ + DESTDIR=$(LIBBPF_DESTDIR) prefix= $(LIBBPF) install_headers + +$(LIBBPF_INCLUDE)/bpf/$(LIBBPF_INTERNAL_HDRS): \ + $(addprefix $(BPF_DIR),$(LIBBPF_INTERNAL_HDRS)) $(LIBBPF) + $(call QUIET_INSTALL, bpf/$(notdir $@)) + $(Q)install -m 644 -t $(LIBBPF_INCLUDE)/bpf/ $(BPF_DIR)$(notdir $@) $(LIBBPF_BOOTSTRAP): FORCE | $(LIBBPF_BOOTSTRAP_OUTPUT) $(Q)$(MAKE) -C $(BPF_DIR) OUTPUT=$(LIBBPF_BOOTSTRAP_OUTPUT) \ @@ -60,10 +70,10 @@ CFLAGS += -W -Wall -Wextra -Wno-unused-parameter -Wno-missing-field-initializers CFLAGS += $(filter-out -Wswitch-enum -Wnested-externs,$(EXTRA_WARNINGS)) CFLAGS += -DPACKAGE='"bpftool"' -D__EXPORTED_HEADERS__ \ -I$(if $(OUTPUT),$(OUTPUT),.) \ + -I$(LIBBPF_INCLUDE) \ -I$(srctree)/kernel/bpf/ \ -I$(srctree)/tools/include \ -I$(srctree)/tools/include/uapi \ - -I$(srctree)/tools/lib \ -I$(srctree)/tools/perf CFLAGS += -DBPFTOOL_VERSION='"$(BPFTOOL_VERSION)"' ifneq ($(EXTRA_CFLAGS),) @@ -140,7 +150,7 @@ BOOTSTRAP_OBJS = $(addprefix $(BOOTSTRAP_OUTPUT),main.o common.o json_writer.o g $(BOOTSTRAP_OBJS): $(LIBBPF_BOOTSTRAP) OBJS = $(patsubst %.c,$(OUTPUT)%.o,$(SRCS)) $(OUTPUT)disasm.o -$(OBJS): $(LIBBPF) +$(OBJS): $(LIBBPF) $(LIBBPF_INCLUDE)/bpf/$(LIBBPF_INTERNAL_HDRS) VMLINUX_BTF_PATHS ?= $(if $(O),$(O)/vmlinux) \ $(if $(KBUILD_OUTPUT),$(KBUILD_OUTPUT)/vmlinux) \ @@ -167,8 +177,7 @@ $(OUTPUT)%.bpf.o: skeleton/%.bpf.c $(OUTPUT)vmlinux.h $(LIBBPF) $(QUIET_CLANG)$(CLANG) \ -I$(if $(OUTPUT),$(OUTPUT),.) \ -I$(srctree)/tools/include/uapi/ \ - -I$(LIBBPF_PATH) \ - -I$(srctree)/tools/lib \ + -I$(LIBBPF_INCLUDE) \ -g -O2 -Wall -target bpf -c $< -o $@ && $(LLVM_STRIP) -g $@ $(OUTPUT)%.skel.h: $(OUTPUT)%.bpf.o $(BPFTOOL_BOOTSTRAP) diff --git a/tools/testing/selftests/bpf/Makefile b/tools/testing/selftests/bpf/Makefile index c5c9a9f50d8d0..849a4637f59d2 100644 --- a/tools/testing/selftests/bpf/Makefile +++ b/tools/testing/selftests/bpf/Makefile @@ -209,6 +209,8 @@ $(DEFAULT_BPFTOOL): $(wildcard $(BPFTOOLDIR)/*.[ch] $(BPFTOOLDIR)/Makefile) \ CC=$(HOSTCC) LD=$(HOSTLD) \ EXTRA_CFLAGS='-g -O0' \ OUTPUT=$(HOST_BUILD_DIR)/bpftool/ \ + LIBBPF_OUTPUT=$(HOST_BUILD_DIR)/libbpf/ \ + LIBBPF_DESTDIR=$(HOST_SCRATCH_DIR)/ \ prefix= DESTDIR=$(HOST_SCRATCH_DIR)/ install all: docs From 1478994aad82810d833bf9c816fb4e9845553e9b Mon Sep 17 00:00:00 2001 From: Quentin Monnet Date: Thu, 7 Oct 2021 20:44:30 +0100 Subject: [PATCH 039/181] tools/resolve_btfids: Install libbpf headers when building API headers from libbpf should not be accessed directly from the library's source directory. Instead, they should be exported with "make install_headers". Let's make sure that resolve_btfids installs the headers properly when building. When descending from a parent Makefile, the specific output directories for building the library and exporting the headers are configurable with LIBBPF_OUT and LIBBPF_DESTDIR, respectively. This is in addition to OUTPUT, on top of which those variables are constructed by default. Also adjust the Makefile for the BPF selftests in order to point to the (target) libbpf shared with other tools, instead of building a version specific to resolve_btfids. Remove libbpf's order-only dependencies on the include directories (they are created by libbpf and don't need to exist beforehand). Signed-off-by: Quentin Monnet Signed-off-by: Andrii Nakryiko Acked-by: Andrii Nakryiko Link: https://lore.kernel.org/bpf/20211007194438.34443-5-quentin@isovalent.com --- tools/bpf/resolve_btfids/Makefile | 16 +++++++++++----- tools/bpf/resolve_btfids/main.c | 4 ++-- tools/testing/selftests/bpf/Makefile | 7 +++++-- 3 files changed, 18 insertions(+), 9 deletions(-) diff --git a/tools/bpf/resolve_btfids/Makefile b/tools/bpf/resolve_btfids/Makefile index 08b75e314ae7e..751643f860b29 100644 --- a/tools/bpf/resolve_btfids/Makefile +++ b/tools/bpf/resolve_btfids/Makefile @@ -29,25 +29,30 @@ BPFOBJ := $(OUTPUT)/libbpf/libbpf.a LIBBPF_OUT := $(abspath $(dir $(BPFOBJ)))/ SUBCMDOBJ := $(OUTPUT)/libsubcmd/libsubcmd.a +LIBBPF_DESTDIR := $(LIBBPF_OUT) +LIBBPF_INCLUDE := $(LIBBPF_DESTDIR)include + BINARY := $(OUTPUT)/resolve_btfids BINARY_IN := $(BINARY)-in.o all: $(BINARY) -$(OUTPUT) $(OUTPUT)/libbpf $(OUTPUT)/libsubcmd: +$(OUTPUT) $(OUTPUT)/libsubcmd $(LIBBPF_OUT): $(call msg,MKDIR,,$@) $(Q)mkdir -p $(@) $(SUBCMDOBJ): fixdep FORCE | $(OUTPUT)/libsubcmd $(Q)$(MAKE) -C $(SUBCMD_SRC) OUTPUT=$(abspath $(dir $@))/ $(abspath $@) -$(BPFOBJ): $(wildcard $(LIBBPF_SRC)/*.[ch] $(LIBBPF_SRC)/Makefile) | $(OUTPUT)/libbpf - $(Q)$(MAKE) $(submake_extras) -C $(LIBBPF_SRC) OUTPUT=$(LIBBPF_OUT) $(abspath $@) +$(BPFOBJ): $(wildcard $(LIBBPF_SRC)/*.[ch] $(LIBBPF_SRC)/Makefile) | $(LIBBPF_OUT) + $(Q)$(MAKE) $(submake_extras) -C $(LIBBPF_SRC) OUTPUT=$(LIBBPF_OUT) \ + DESTDIR=$(LIBBPF_DESTDIR) prefix= \ + $(abspath $@) install_headers CFLAGS := -g \ -I$(srctree)/tools/include \ -I$(srctree)/tools/include/uapi \ - -I$(LIBBPF_SRC) \ + -I$(LIBBPF_INCLUDE) \ -I$(SUBCMD_SRC) LIBS = -lelf -lz @@ -65,7 +70,8 @@ $(BINARY): $(BPFOBJ) $(SUBCMDOBJ) $(BINARY_IN) clean_objects := $(wildcard $(OUTPUT)/*.o \ $(OUTPUT)/.*.o.cmd \ $(OUTPUT)/.*.o.d \ - $(OUTPUT)/libbpf \ + $(LIBBPF_OUT) \ + $(LIBBPF_DESTDIR) \ $(OUTPUT)/libsubcmd \ $(OUTPUT)/resolve_btfids) diff --git a/tools/bpf/resolve_btfids/main.c b/tools/bpf/resolve_btfids/main.c index c6c3e613858ac..716e6ad1864b8 100644 --- a/tools/bpf/resolve_btfids/main.c +++ b/tools/bpf/resolve_btfids/main.c @@ -60,8 +60,8 @@ #include #include #include -#include -#include +#include +#include #include #define BTF_IDS_SECTION ".BTF_ids" diff --git a/tools/testing/selftests/bpf/Makefile b/tools/testing/selftests/bpf/Makefile index 849a4637f59d2..090f424ac5e15 100644 --- a/tools/testing/selftests/bpf/Makefile +++ b/tools/testing/selftests/bpf/Makefile @@ -122,9 +122,11 @@ BPFOBJ := $(BUILD_DIR)/libbpf/libbpf.a ifneq ($(CROSS_COMPILE),) HOST_BUILD_DIR := $(BUILD_DIR)/host HOST_SCRATCH_DIR := $(OUTPUT)/host-tools +HOST_INCLUDE_DIR := $(HOST_SCRATCH_DIR)/include else HOST_BUILD_DIR := $(BUILD_DIR) HOST_SCRATCH_DIR := $(SCRATCH_DIR) +HOST_INCLUDE_DIR := $(INCLUDE_DIR) endif HOST_BPFOBJ := $(HOST_BUILD_DIR)/libbpf/libbpf.a RESOLVE_BTFIDS := $(HOST_BUILD_DIR)/resolve_btfids/resolve_btfids @@ -227,7 +229,7 @@ docs-clean: $(BPFOBJ): $(wildcard $(BPFDIR)/*.[ch] $(BPFDIR)/Makefile) \ ../../../include/uapi/linux/bpf.h \ - | $(INCLUDE_DIR) $(BUILD_DIR)/libbpf + | $(BUILD_DIR)/libbpf $(Q)$(MAKE) $(submake_extras) -C $(BPFDIR) OUTPUT=$(BUILD_DIR)/libbpf/ \ EXTRA_CFLAGS='-g -O0' \ DESTDIR=$(SCRATCH_DIR) prefix= all install_headers @@ -235,7 +237,7 @@ $(BPFOBJ): $(wildcard $(BPFDIR)/*.[ch] $(BPFDIR)/Makefile) \ ifneq ($(BPFOBJ),$(HOST_BPFOBJ)) $(HOST_BPFOBJ): $(wildcard $(BPFDIR)/*.[ch] $(BPFDIR)/Makefile) \ ../../../include/uapi/linux/bpf.h \ - | $(INCLUDE_DIR) $(HOST_BUILD_DIR)/libbpf + | $(HOST_BUILD_DIR)/libbpf $(Q)$(MAKE) $(submake_extras) -C $(BPFDIR) \ EXTRA_CFLAGS='-g -O0' \ OUTPUT=$(HOST_BUILD_DIR)/libbpf/ CC=$(HOSTCC) LD=$(HOSTLD) \ @@ -260,6 +262,7 @@ $(RESOLVE_BTFIDS): $(HOST_BPFOBJ) | $(HOST_BUILD_DIR)/resolve_btfids \ $(TOOLSDIR)/lib/str_error_r.c $(Q)$(MAKE) $(submake_extras) -C $(TOOLSDIR)/bpf/resolve_btfids \ CC=$(HOSTCC) LD=$(HOSTLD) AR=$(HOSTAR) \ + LIBBPF_INCLUDE=$(HOST_INCLUDE_DIR) \ OUTPUT=$(HOST_BUILD_DIR)/resolve_btfids/ BPFOBJ=$(HOST_BPFOBJ) # Get Clang's default includes on this system, as opposed to those seen by From be79505caf3f99a2f9cca5946261085b333f7034 Mon Sep 17 00:00:00 2001 From: Quentin Monnet Date: Thu, 7 Oct 2021 20:44:31 +0100 Subject: [PATCH 040/181] tools/runqslower: Install libbpf headers when building API headers from libbpf should not be accessed directly from the library's source directory. Instead, they should be exported with "make install_headers". Let's make sure that runqslower installs the headers properly when building. We use a libbpf_hdrs target to mark the logical dependency on libbpf's headers export for a number of object files, even though the headers should have been exported at this time (since bpftool needs them, and is required to generate the skeleton or the vmlinux.h). When descending from a parent Makefile, the specific output directories for building the library and exporting the headers are configurable with BPFOBJ_OUTPUT and BPF_DESTDIR, respectively. This is in addition to OUTPUT, on top of which those variables are constructed by default. Also adjust the Makefile for the BPF selftests. We pass a number of variables to the "make" invocation, because we want to point runqslower to the (target) libbpf shared with other tools, instead of building its own version. In addition, runqslower relies on (target) bpftool, and we also want to pass the proper variables to its Makefile so that bpftool itself reuses the same libbpf. Signed-off-by: Quentin Monnet Signed-off-by: Andrii Nakryiko Link: https://lore.kernel.org/bpf/20211007194438.34443-6-quentin@isovalent.com --- tools/bpf/runqslower/Makefile | 22 +++++++++++++--------- tools/testing/selftests/bpf/Makefile | 15 +++++++++------ 2 files changed, 22 insertions(+), 15 deletions(-) diff --git a/tools/bpf/runqslower/Makefile b/tools/bpf/runqslower/Makefile index 3818ec511fd25..bbd1150578f7a 100644 --- a/tools/bpf/runqslower/Makefile +++ b/tools/bpf/runqslower/Makefile @@ -9,9 +9,9 @@ BPFTOOL ?= $(DEFAULT_BPFTOOL) LIBBPF_SRC := $(abspath ../../lib/bpf) BPFOBJ_OUTPUT := $(OUTPUT)libbpf/ BPFOBJ := $(BPFOBJ_OUTPUT)libbpf.a -BPF_INCLUDE := $(BPFOBJ_OUTPUT) -INCLUDES := -I$(OUTPUT) -I$(BPF_INCLUDE) -I$(abspath ../../lib) \ - -I$(abspath ../../include/uapi) +BPF_DESTDIR := $(BPFOBJ_OUTPUT) +BPF_INCLUDE := $(BPF_DESTDIR)/include +INCLUDES := -I$(OUTPUT) -I$(BPF_INCLUDE) -I$(abspath ../../include/uapi) CFLAGS := -g -Wall # Try to detect best kernel BTF source @@ -33,7 +33,7 @@ endif .DELETE_ON_ERROR: -.PHONY: all clean runqslower +.PHONY: all clean runqslower libbpf_hdrs all: runqslower runqslower: $(OUTPUT)/runqslower @@ -46,13 +46,15 @@ clean: $(Q)$(RM) $(OUTPUT)runqslower $(Q)$(RM) -r .output +libbpf_hdrs: $(BPFOBJ) + $(OUTPUT)/runqslower: $(OUTPUT)/runqslower.o $(BPFOBJ) $(QUIET_LINK)$(CC) $(CFLAGS) $^ -lelf -lz -o $@ $(OUTPUT)/runqslower.o: runqslower.h $(OUTPUT)/runqslower.skel.h \ - $(OUTPUT)/runqslower.bpf.o + $(OUTPUT)/runqslower.bpf.o | libbpf_hdrs -$(OUTPUT)/runqslower.bpf.o: $(OUTPUT)/vmlinux.h runqslower.h +$(OUTPUT)/runqslower.bpf.o: $(OUTPUT)/vmlinux.h runqslower.h | libbpf_hdrs $(OUTPUT)/%.skel.h: $(OUTPUT)/%.bpf.o | $(BPFTOOL) $(QUIET_GEN)$(BPFTOOL) gen skeleton $< > $@ @@ -81,8 +83,10 @@ else endif $(BPFOBJ): $(wildcard $(LIBBPF_SRC)/*.[ch] $(LIBBPF_SRC)/Makefile) | $(BPFOBJ_OUTPUT) - $(Q)$(MAKE) $(submake_extras) -C $(LIBBPF_SRC) OUTPUT=$(BPFOBJ_OUTPUT) $@ + $(Q)$(MAKE) $(submake_extras) -C $(LIBBPF_SRC) OUTPUT=$(BPFOBJ_OUTPUT) \ + DESTDIR=$(BPFOBJ_OUTPUT) prefix= $(abspath $@) install_headers -$(DEFAULT_BPFTOOL): | $(BPFTOOL_OUTPUT) +$(DEFAULT_BPFTOOL): $(BPFOBJ) | $(BPFTOOL_OUTPUT) $(Q)$(MAKE) $(submake_extras) -C ../bpftool OUTPUT=$(BPFTOOL_OUTPUT) \ - CC=$(HOSTCC) LD=$(HOSTLD) + LIBBPF_OUTPUT=$(BPFOBJ_OUTPUT) \ + LIBBPF_DESTDIR=$(BPF_DESTDIR) CC=$(HOSTCC) LD=$(HOSTLD) diff --git a/tools/testing/selftests/bpf/Makefile b/tools/testing/selftests/bpf/Makefile index 090f424ac5e15..e023d734f7b0a 100644 --- a/tools/testing/selftests/bpf/Makefile +++ b/tools/testing/selftests/bpf/Makefile @@ -130,6 +130,7 @@ HOST_INCLUDE_DIR := $(INCLUDE_DIR) endif HOST_BPFOBJ := $(HOST_BUILD_DIR)/libbpf/libbpf.a RESOLVE_BTFIDS := $(HOST_BUILD_DIR)/resolve_btfids/resolve_btfids +RUNQSLOWER_OUTPUT := $(BUILD_DIR)/runqslower/ VMLINUX_BTF_PATHS ?= $(if $(O),$(O)/vmlinux) \ $(if $(KBUILD_OUTPUT),$(KBUILD_OUTPUT)/vmlinux) \ @@ -154,7 +155,7 @@ $(notdir $(TEST_GEN_PROGS) \ # sort removes libbpf duplicates when not cross-building MAKE_DIRS := $(sort $(BUILD_DIR)/libbpf $(HOST_BUILD_DIR)/libbpf \ $(HOST_BUILD_DIR)/bpftool $(HOST_BUILD_DIR)/resolve_btfids \ - $(INCLUDE_DIR)) + $(RUNQSLOWER_OUTPUT) $(INCLUDE_DIR)) $(MAKE_DIRS): $(call msg,MKDIR,,$@) $(Q)mkdir -p $@ @@ -183,11 +184,13 @@ $(OUTPUT)/test_stub.o: test_stub.c $(BPFOBJ) DEFAULT_BPFTOOL := $(HOST_SCRATCH_DIR)/sbin/bpftool -$(OUTPUT)/runqslower: $(BPFOBJ) | $(DEFAULT_BPFTOOL) - $(Q)$(MAKE) $(submake_extras) -C $(TOOLSDIR)/bpf/runqslower \ - OUTPUT=$(SCRATCH_DIR)/ VMLINUX_BTF=$(VMLINUX_BTF) \ - BPFOBJ=$(BPFOBJ) BPF_INCLUDE=$(INCLUDE_DIR) && \ - cp $(SCRATCH_DIR)/runqslower $@ +$(OUTPUT)/runqslower: $(BPFOBJ) | $(DEFAULT_BPFTOOL) $(RUNQSLOWER_OUTPUT) + $(Q)$(MAKE) $(submake_extras) -C $(TOOLSDIR)/bpf/runqslower \ + OUTPUT=$(RUNQSLOWER_OUTPUT) VMLINUX_BTF=$(VMLINUX_BTF) \ + BPFTOOL_OUTPUT=$(BUILD_DIR)/bpftool/ \ + BPFOBJ_OUTPUT=$(BUILD_DIR)/libbpf \ + BPFOBJ=$(BPFOBJ) BPF_INCLUDE=$(INCLUDE_DIR) && \ + cp $(RUNQSLOWER_OUTPUT)runqslower $@ TEST_GEN_PROGS_EXTENDED += $(DEFAULT_BPFTOOL) From bf60791741d430e8a3e2f8b4a3941d392bf838c2 Mon Sep 17 00:00:00 2001 From: Quentin Monnet Date: Thu, 7 Oct 2021 20:44:32 +0100 Subject: [PATCH 041/181] bpf: preload: Install libbpf headers when building API headers from libbpf should not be accessed directly from the library's source directory. Instead, they should be exported with "make install_headers". Let's make sure that bpf/preload/Makefile installs the headers properly when building. Note that we declare an additional dependency for iterators/iterators.o: having $(LIBBPF_A) as a dependency to "$(obj)/bpf_preload_umd" is not sufficient, as it makes it required only at the linking step. But we need libbpf to be compiled, and in particular its headers to be exported, before we attempt to compile iterators.o. The issue would not occur before this commit, because libbpf's headers were not exported and were always available under tools/lib/bpf. Signed-off-by: Quentin Monnet Signed-off-by: Andrii Nakryiko Link: https://lore.kernel.org/bpf/20211007194438.34443-7-quentin@isovalent.com --- kernel/bpf/preload/Makefile | 25 ++++++++++++++++++++----- 1 file changed, 20 insertions(+), 5 deletions(-) diff --git a/kernel/bpf/preload/Makefile b/kernel/bpf/preload/Makefile index 1951332dd15f5..469d35e890ebc 100644 --- a/kernel/bpf/preload/Makefile +++ b/kernel/bpf/preload/Makefile @@ -1,21 +1,36 @@ # SPDX-License-Identifier: GPL-2.0 LIBBPF_SRCS = $(srctree)/tools/lib/bpf/ -LIBBPF_A = $(obj)/libbpf.a -LIBBPF_OUT = $(abspath $(obj)) +LIBBPF_OUT = $(abspath $(obj))/libbpf +LIBBPF_A = $(LIBBPF_OUT)/libbpf.a +LIBBPF_DESTDIR = $(LIBBPF_OUT) +LIBBPF_INCLUDE = $(LIBBPF_DESTDIR)/include # Although not in use by libbpf's Makefile, set $(O) so that the "dummy" test # in tools/scripts/Makefile.include always succeeds when building the kernel # with $(O) pointing to a relative path, as in "make O=build bindeb-pkg". -$(LIBBPF_A): - $(Q)$(MAKE) -C $(LIBBPF_SRCS) O=$(LIBBPF_OUT)/ OUTPUT=$(LIBBPF_OUT)/ $(LIBBPF_OUT)/libbpf.a +$(LIBBPF_A): | $(LIBBPF_OUT) + $(Q)$(MAKE) -C $(LIBBPF_SRCS) O=$(LIBBPF_OUT)/ OUTPUT=$(LIBBPF_OUT)/ \ + DESTDIR=$(LIBBPF_DESTDIR) prefix= \ + $(LIBBPF_OUT)/libbpf.a install_headers + +libbpf_hdrs: $(LIBBPF_A) + +.PHONY: libbpf_hdrs + +$(LIBBPF_OUT): + $(call msg,MKDIR,$@) + $(Q)mkdir -p $@ userccflags += -I $(srctree)/tools/include/ -I $(srctree)/tools/include/uapi \ - -I $(srctree)/tools/lib/ -Wno-unused-result + -I $(LIBBPF_INCLUDE) -Wno-unused-result userprogs := bpf_preload_umd clean-files := $(userprogs) bpf_helper_defs.h FEATURE-DUMP.libbpf staticobjs/ feature/ +clean-files += $(LIBBPF_OUT) $(LIBBPF_DESTDIR) + +$(obj)/iterators/iterators.o: | libbpf_hdrs bpf_preload_umd-objs := iterators/iterators.o bpf_preload_umd-userldlibs := $(LIBBPF_A) -lelf -lz From 7bf731dcc641f7d7c71d1932678e0de8ea472612 Mon Sep 17 00:00:00 2001 From: Quentin Monnet Date: Thu, 7 Oct 2021 20:44:33 +0100 Subject: [PATCH 042/181] bpf: iterators: Install libbpf headers when building API headers from libbpf should not be accessed directly from the library's source directory. Instead, they should be exported with "make install_headers". Let's make sure that bpf/preload/iterators/Makefile installs the headers properly when building. Signed-off-by: Quentin Monnet Signed-off-by: Andrii Nakryiko Link: https://lore.kernel.org/bpf/20211007194438.34443-8-quentin@isovalent.com --- kernel/bpf/preload/iterators/Makefile | 38 ++++++++++++++++++--------- 1 file changed, 25 insertions(+), 13 deletions(-) diff --git a/kernel/bpf/preload/iterators/Makefile b/kernel/bpf/preload/iterators/Makefile index 28fa8c1440f46..a4aedc7b0728c 100644 --- a/kernel/bpf/preload/iterators/Makefile +++ b/kernel/bpf/preload/iterators/Makefile @@ -1,18 +1,26 @@ # SPDX-License-Identifier: GPL-2.0 OUTPUT := .output +abs_out := $(abspath $(OUTPUT)) + CLANG ?= clang LLC ?= llc LLVM_STRIP ?= llvm-strip + +TOOLS_PATH := $(abspath ../../../../tools) +BPFTOOL_SRC := $(TOOLS_PATH)/bpf/bpftool +BPFTOOL_OUTPUT := $(abs_out)/bpftool DEFAULT_BPFTOOL := $(OUTPUT)/sbin/bpftool BPFTOOL ?= $(DEFAULT_BPFTOOL) -LIBBPF_SRC := $(abspath ../../../../tools/lib/bpf) -BPFOBJ := $(OUTPUT)/libbpf.a -BPF_INCLUDE := $(OUTPUT) -INCLUDES := -I$(OUTPUT) -I$(BPF_INCLUDE) -I$(abspath ../../../../tools/lib) \ - -I$(abspath ../../../../tools/include/uapi) + +LIBBPF_SRC := $(TOOLS_PATH)/lib/bpf +LIBBPF_OUTPUT := $(abs_out)/libbpf +LIBBPF_DESTDIR := $(LIBBPF_OUTPUT) +LIBBPF_INCLUDE := $(LIBBPF_DESTDIR)/include +BPFOBJ := $(LIBBPF_OUTPUT)/libbpf.a + +INCLUDES := -I$(OUTPUT) -I$(LIBBPF_INCLUDE) -I$(TOOLS_PATH)/include/uapi CFLAGS := -g -Wall -abs_out := $(abspath $(OUTPUT)) ifeq ($(V),1) Q = msg = @@ -44,14 +52,18 @@ $(OUTPUT)/iterators.bpf.o: iterators.bpf.c $(BPFOBJ) | $(OUTPUT) -c $(filter %.c,$^) -o $@ && \ $(LLVM_STRIP) -g $@ -$(OUTPUT): +$(OUTPUT) $(LIBBPF_OUTPUT) $(BPFTOOL_OUTPUT): $(call msg,MKDIR,$@) - $(Q)mkdir -p $(OUTPUT) + $(Q)mkdir -p $@ -$(BPFOBJ): $(wildcard $(LIBBPF_SRC)/*.[ch] $(LIBBPF_SRC)/Makefile) | $(OUTPUT) +$(BPFOBJ): $(wildcard $(LIBBPF_SRC)/*.[ch] $(LIBBPF_SRC)/Makefile) | $(LIBBPF_OUTPUT) $(Q)$(MAKE) $(submake_extras) -C $(LIBBPF_SRC) \ - OUTPUT=$(abspath $(dir $@))/ $(abspath $@) + OUTPUT=$(abspath $(dir $@))/ prefix= \ + DESTDIR=$(LIBBPF_DESTDIR) $(abspath $@) install_headers -$(DEFAULT_BPFTOOL): - $(Q)$(MAKE) $(submake_extras) -C ../../../../tools/bpf/bpftool \ - prefix= OUTPUT=$(abs_out)/ DESTDIR=$(abs_out) install +$(DEFAULT_BPFTOOL): $(BPFOBJ) | $(BPFTOOL_OUTPUT) + $(Q)$(MAKE) $(submake_extras) -C $(BPFTOOL_SRC) \ + OUTPUT=$(BPFTOOL_OUTPUT)/ \ + LIBBPF_OUTPUT=$(LIBBPF_OUTPUT)/ \ + LIBBPF_DESTDIR=$(LIBBPF_DESTDIR)/ \ + prefix= DESTDIR=$(abs_out)/ install From 62fde1c8beaf743ea1ab437757ede358f279515d Mon Sep 17 00:00:00 2001 From: Quentin Monnet Date: Thu, 7 Oct 2021 20:44:34 +0100 Subject: [PATCH 043/181] samples/bpf: Update .gitignore Update samples/bpf/.gitignore to ignore files generated when building the samples. Add: - vmlinux.h - the generated skeleton files (*.skel.h) - the samples/bpf/libbpf/ and .../bpftool/ directories, in preparation of a future commit which introduces a local output directory for building libbpf and bpftool. Signed-off-by: Quentin Monnet Signed-off-by: Andrii Nakryiko Link: https://lore.kernel.org/bpf/20211007194438.34443-9-quentin@isovalent.com --- samples/bpf/.gitignore | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/samples/bpf/.gitignore b/samples/bpf/.gitignore index fcba217f0ae2a..0e7bfdbff80a6 100644 --- a/samples/bpf/.gitignore +++ b/samples/bpf/.gitignore @@ -57,3 +57,7 @@ testfile.img hbm_out.log iperf.* *.out +*.skel.h +/vmlinux.h +/bpftool/ +/libbpf/ From 3f7a3318a7c60947e27be372950840a5eab976d0 Mon Sep 17 00:00:00 2001 From: Quentin Monnet Date: Thu, 7 Oct 2021 20:44:35 +0100 Subject: [PATCH 044/181] samples/bpf: Install libbpf headers when building API headers from libbpf should not be accessed directly from the source directory. Instead, they should be exported with "make install_headers". Make sure that samples/bpf/Makefile installs the headers properly when building. The object compiled from and exported by libbpf are now placed into a subdirectory of sample/bpf/ instead of remaining in tools/lib/bpf/. We attempt to remove this directory on "make clean". However, the "clean" target re-enters the samples/bpf/ directory from the root of the repository ("$(MAKE) -C ../../ M=$(CURDIR) clean"), in such a way that $(srctree) and $(src) are not defined, making it impossible to use $(LIBBPF_OUTPUT) and $(LIBBPF_DESTDIR) in the recipe. So we only attempt to clean $(CURDIR)/libbpf, which is the default value. Add a dependency on libbpf's headers for the $(TRACE_HELPERS). We also change the output directory for bpftool, to place the generated objects under samples/bpf/bpftool/ instead of building in bpftool's directory directly. Doing so, we make sure bpftool reuses the libbpf library previously compiled and installed. Signed-off-by: Quentin Monnet Signed-off-by: Andrii Nakryiko Link: https://lore.kernel.org/bpf/20211007194438.34443-10-quentin@isovalent.com --- samples/bpf/Makefile | 40 ++++++++++++++++++++++++++++++---------- 1 file changed, 30 insertions(+), 10 deletions(-) diff --git a/samples/bpf/Makefile b/samples/bpf/Makefile index a5783749ec150..8c5311d776804 100644 --- a/samples/bpf/Makefile +++ b/samples/bpf/Makefile @@ -61,7 +61,11 @@ tprogs-y += xdp_redirect tprogs-y += xdp_monitor # Libbpf dependencies -LIBBPF = $(TOOLS_PATH)/lib/bpf/libbpf.a +LIBBPF_SRC = $(TOOLS_PATH)/lib/bpf +LIBBPF_OUTPUT = $(abspath $(BPF_SAMPLES_PATH))/libbpf +LIBBPF_DESTDIR = $(LIBBPF_OUTPUT) +LIBBPF_INCLUDE = $(LIBBPF_DESTDIR)/include +LIBBPF = $(LIBBPF_OUTPUT)/libbpf.a CGROUP_HELPERS := ../../tools/testing/selftests/bpf/cgroup_helpers.o TRACE_HELPERS := ../../tools/testing/selftests/bpf/trace_helpers.o @@ -200,7 +204,7 @@ TPROGS_CFLAGS += -Wstrict-prototypes TPROGS_CFLAGS += -I$(objtree)/usr/include TPROGS_CFLAGS += -I$(srctree)/tools/testing/selftests/bpf/ -TPROGS_CFLAGS += -I$(srctree)/tools/lib/ +TPROGS_CFLAGS += -I$(LIBBPF_INCLUDE) TPROGS_CFLAGS += -I$(srctree)/tools/include TPROGS_CFLAGS += -I$(srctree)/tools/perf TPROGS_CFLAGS += -DHAVE_ATTR_TEST=0 @@ -270,16 +274,27 @@ all: clean: $(MAKE) -C ../../ M=$(CURDIR) clean @find $(CURDIR) -type f -name '*~' -delete + @$(RM) -r $(CURDIR)/libbpf $(CURDIR)/bpftool -$(LIBBPF): FORCE +$(LIBBPF): FORCE | $(LIBBPF_OUTPUT) # Fix up variables inherited from Kbuild that tools/ build system won't like - $(MAKE) -C $(dir $@) RM='rm -rf' EXTRA_CFLAGS="$(TPROGS_CFLAGS)" \ - LDFLAGS=$(TPROGS_LDFLAGS) srctree=$(BPF_SAMPLES_PATH)/../../ O= + $(MAKE) -C $(LIBBPF_SRC) RM='rm -rf' EXTRA_CFLAGS="$(TPROGS_CFLAGS)" \ + LDFLAGS=$(TPROGS_LDFLAGS) srctree=$(BPF_SAMPLES_PATH)/../../ \ + O= OUTPUT=$(LIBBPF_OUTPUT)/ DESTDIR=$(LIBBPF_DESTDIR) prefix= \ + $@ install_headers BPFTOOLDIR := $(TOOLS_PATH)/bpf/bpftool -BPFTOOL := $(BPFTOOLDIR)/bpftool -$(BPFTOOL): $(wildcard $(BPFTOOLDIR)/*.[ch] $(BPFTOOLDIR)/Makefile) - $(MAKE) -C $(BPFTOOLDIR) srctree=$(BPF_SAMPLES_PATH)/../../ +BPFTOOL_OUTPUT := $(abspath $(BPF_SAMPLES_PATH))/bpftool +BPFTOOL := $(BPFTOOL_OUTPUT)/bpftool +$(BPFTOOL): $(LIBBPF) $(wildcard $(BPFTOOLDIR)/*.[ch] $(BPFTOOLDIR)/Makefile) | $(BPFTOOL_OUTPUT) + $(MAKE) -C $(BPFTOOLDIR) srctree=$(BPF_SAMPLES_PATH)/../../ \ + OUTPUT=$(BPFTOOL_OUTPUT)/ \ + LIBBPF_OUTPUT=$(LIBBPF_OUTPUT)/ \ + LIBBPF_DESTDIR=$(LIBBPF_DESTDIR)/ + +$(LIBBPF_OUTPUT) $(BPFTOOL_OUTPUT): + $(call msg,MKDIR,$@) + $(Q)mkdir -p $@ $(obj)/syscall_nrs.h: $(obj)/syscall_nrs.s FORCE $(call filechk,offsets,__SYSCALL_NRS_H__) @@ -311,6 +326,11 @@ verify_target_bpf: verify_cmds $(BPF_SAMPLES_PATH)/*.c: verify_target_bpf $(LIBBPF) $(src)/*.c: verify_target_bpf $(LIBBPF) +libbpf_hdrs: $(LIBBPF) +$(obj)/$(TRACE_HELPERS): | libbpf_hdrs + +.PHONY: libbpf_hdrs + $(obj)/xdp_redirect_cpu_user.o: $(obj)/xdp_redirect_cpu.skel.h $(obj)/xdp_redirect_map_multi_user.o: $(obj)/xdp_redirect_map_multi.skel.h $(obj)/xdp_redirect_map_user.o: $(obj)/xdp_redirect_map.skel.h @@ -369,7 +389,7 @@ $(obj)/%.bpf.o: $(src)/%.bpf.c $(obj)/vmlinux.h $(src)/xdp_sample.bpf.h $(src)/x $(Q)$(CLANG) -g -O2 -target bpf -D__TARGET_ARCH_$(SRCARCH) \ -Wno-compare-distinct-pointer-types -I$(srctree)/include \ -I$(srctree)/samples/bpf -I$(srctree)/tools/include \ - -I$(srctree)/tools/lib $(CLANG_SYS_INCLUDES) \ + -I$(LIBBPF_INCLUDE) $(CLANG_SYS_INCLUDES) \ -c $(filter %.bpf.c,$^) -o $@ LINKED_SKELS := xdp_redirect_cpu.skel.h xdp_redirect_map_multi.skel.h \ @@ -406,7 +426,7 @@ $(obj)/%.o: $(src)/%.c @echo " CLANG-bpf " $@ $(Q)$(CLANG) $(NOSTDINC_FLAGS) $(LINUXINCLUDE) $(BPF_EXTRA_CFLAGS) \ -I$(obj) -I$(srctree)/tools/testing/selftests/bpf/ \ - -I$(srctree)/tools/lib/ \ + -I$(LIBBPF_INCLUDE) \ -D__KERNEL__ -D__BPF_TRACING__ -Wno-unused-value -Wno-pointer-sign \ -D__TARGET_ARCH_$(SRCARCH) -Wno-compare-distinct-pointer-types \ -Wno-gnu-variable-sized-type-not-at-end \ From a60d24e7400247bf21cac451f28961ac800756c6 Mon Sep 17 00:00:00 2001 From: Quentin Monnet Date: Thu, 7 Oct 2021 20:44:36 +0100 Subject: [PATCH 045/181] samples/bpf: Do not FORCE-recompile libbpf In samples/bpf/Makefile, libbpf has a FORCE dependency that force it to be rebuilt. I read this as a way to keep the library up-to-date, given that we do not have, in samples/bpf, a list of the source files for libbpf itself. However, a better approach would be to use the "$(wildcard ...)" function from make, and to have libbpf depend on all the .c and .h files in its directory. This is what samples/bpf/Makefile does for bpftool, and also what the BPF selftests' Makefile does for libbpf. Let's update the Makefile to avoid rebuilding libbpf all the time (and bpftool on top of it). Signed-off-by: Quentin Monnet Signed-off-by: Andrii Nakryiko Link: https://lore.kernel.org/bpf/20211007194438.34443-11-quentin@isovalent.com --- samples/bpf/Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/samples/bpf/Makefile b/samples/bpf/Makefile index 8c5311d776804..4c5ad15f8d288 100644 --- a/samples/bpf/Makefile +++ b/samples/bpf/Makefile @@ -276,7 +276,7 @@ clean: @find $(CURDIR) -type f -name '*~' -delete @$(RM) -r $(CURDIR)/libbpf $(CURDIR)/bpftool -$(LIBBPF): FORCE | $(LIBBPF_OUTPUT) +$(LIBBPF): $(wildcard $(LIBBPF_SRC)/*.[ch] $(LIBBPF_SRC)/Makefile) | $(LIBBPF_OUTPUT) # Fix up variables inherited from Kbuild that tools/ build system won't like $(MAKE) -C $(LIBBPF_SRC) RM='rm -rf' EXTRA_CFLAGS="$(TPROGS_CFLAGS)" \ LDFLAGS=$(TPROGS_LDFLAGS) srctree=$(BPF_SAMPLES_PATH)/../../ \ From 87ee33bfdd4f74edc1548c7f0140800cfcc33039 Mon Sep 17 00:00:00 2001 From: Quentin Monnet Date: Thu, 7 Oct 2021 20:44:37 +0100 Subject: [PATCH 046/181] selftests/bpf: Better clean up for runqslower in test_bpftool_build.sh The script test_bpftool_build.sh attempts to build bpftool in the various supported ways, to make sure nothing breaks. One of those ways is to run "make tools/bpf" from the root of the kernel repository. This command builds bpftool, along with the other tools under tools/bpf, and runqslower in particular. After running the command and upon a successful bpftool build, the script attempts to cleanup the generated objects. However, after building with this target and in the case of runqslower, the files are not cleaned up as expected. This is because the "tools/bpf" target sets $(OUTPUT) to .../tools/bpf/runqslower/ when building the tool, causing the object files to be placed directly under the runqslower directory. But when running "cd tools/bpf; make clean", the value for $(OUTPUT) is set to ".output" (relative to the runqslower directory) by runqslower's Makefile, and this is where the Makefile looks for files to clean up. We cannot easily fix in the root Makefile (where "tools/bpf" is defined) or in tools/scripts/Makefile.include (setting $(OUTPUT)), where changing the way the output variables are passed would likely have consequences elsewhere. We could change runqslower's Makefile to build in the repository instead of in a dedicated ".output/", but doing so just to accommodate a test script doesn't sound great. Instead, let's just make sure that we clean up runqslower properly by adding the correct command to the script. This will attempt to clean runqslower twice: the first try with command "cd tools/bpf; make clean" will search for tools/bpf/runqslower/.output and fail to clean it (but will still clean the other tools, in particular bpftool), the second one (added in this commit) sets the $(OUTPUT) variable like for building with the "tool/bpf" target and should succeed. Signed-off-by: Quentin Monnet Signed-off-by: Andrii Nakryiko Link: https://lore.kernel.org/bpf/20211007194438.34443-12-quentin@isovalent.com --- tools/testing/selftests/bpf/test_bpftool_build.sh | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/tools/testing/selftests/bpf/test_bpftool_build.sh b/tools/testing/selftests/bpf/test_bpftool_build.sh index b03a875715920..1453a53ed547d 100755 --- a/tools/testing/selftests/bpf/test_bpftool_build.sh +++ b/tools/testing/selftests/bpf/test_bpftool_build.sh @@ -90,6 +90,10 @@ echo -e "... through kbuild\n" if [ -f ".config" ] ; then make_and_clean tools/bpf + ## "make tools/bpf" sets $(OUTPUT) to ...tools/bpf/runqslower for + ## runqslower, but the default (used for the "clean" target) is .output. + ## Let's make sure we clean runqslower's directory properly. + make -C tools/bpf/runqslower OUTPUT=${KDIR_ROOT_DIR}/tools/bpf/runqslower/ clean ## $OUTPUT is overwritten in kbuild Makefile, and thus cannot be passed ## down from toplevel Makefile to bpftool's Makefile. From d7db0a4e8d95101ebb545444578ba7085c270e5f Mon Sep 17 00:00:00 2001 From: Quentin Monnet Date: Thu, 7 Oct 2021 20:44:38 +0100 Subject: [PATCH 047/181] bpftool: Add install-bin target to install binary only With "make install", bpftool installs its binary and its bash completion file. Usually, this is what we want. But a few components in the kernel repository (namely, BPF iterators and selftests) also install bpftool locally before using it. In such a case, bash completion is not necessary and is just a useless build artifact. Let's add an "install-bin" target to bpftool, to offer a way to install the binary only. Signed-off-by: Quentin Monnet Signed-off-by: Andrii Nakryiko Link: https://lore.kernel.org/bpf/20211007194438.34443-13-quentin@isovalent.com --- kernel/bpf/preload/iterators/Makefile | 2 +- tools/bpf/bpftool/Makefile | 6 ++++-- tools/testing/selftests/bpf/Makefile | 2 +- 3 files changed, 6 insertions(+), 4 deletions(-) diff --git a/kernel/bpf/preload/iterators/Makefile b/kernel/bpf/preload/iterators/Makefile index a4aedc7b0728c..b8bd605112275 100644 --- a/kernel/bpf/preload/iterators/Makefile +++ b/kernel/bpf/preload/iterators/Makefile @@ -66,4 +66,4 @@ $(DEFAULT_BPFTOOL): $(BPFOBJ) | $(BPFTOOL_OUTPUT) OUTPUT=$(BPFTOOL_OUTPUT)/ \ LIBBPF_OUTPUT=$(LIBBPF_OUTPUT)/ \ LIBBPF_DESTDIR=$(LIBBPF_DESTDIR)/ \ - prefix= DESTDIR=$(abs_out)/ install + prefix= DESTDIR=$(abs_out)/ install-bin diff --git a/tools/bpf/bpftool/Makefile b/tools/bpf/bpftool/Makefile index ba02d71c39efd..9c2d13c513f0e 100644 --- a/tools/bpf/bpftool/Makefile +++ b/tools/bpf/bpftool/Makefile @@ -226,10 +226,12 @@ clean: $(LIBBPF)-clean $(LIBBPF_BOOTSTRAP)-clean feature-detect-clean $(Q)$(RM) -- $(OUTPUT)FEATURE-DUMP.bpftool $(Q)$(RM) -r -- $(OUTPUT)feature/ -install: $(OUTPUT)bpftool +install-bin: $(OUTPUT)bpftool $(call QUIET_INSTALL, bpftool) $(Q)$(INSTALL) -m 0755 -d $(DESTDIR)$(prefix)/sbin $(Q)$(INSTALL) $(OUTPUT)bpftool $(DESTDIR)$(prefix)/sbin/bpftool + +install: install-bin $(Q)$(INSTALL) -m 0755 -d $(DESTDIR)$(bash_compdir) $(Q)$(INSTALL) -m 0644 bash-completion/bpftool $(DESTDIR)$(bash_compdir) @@ -256,6 +258,6 @@ zdep: @if [ "$(feature-zlib)" != "1" ]; then echo "No zlib found"; exit 1 ; fi .SECONDARY: -.PHONY: all FORCE clean install uninstall zdep +.PHONY: all FORCE clean install-bin install uninstall zdep .PHONY: doc doc-clean doc-install doc-uninstall .DEFAULT_GOAL := all diff --git a/tools/testing/selftests/bpf/Makefile b/tools/testing/selftests/bpf/Makefile index e023d734f7b0a..498222543c374 100644 --- a/tools/testing/selftests/bpf/Makefile +++ b/tools/testing/selftests/bpf/Makefile @@ -216,7 +216,7 @@ $(DEFAULT_BPFTOOL): $(wildcard $(BPFTOOLDIR)/*.[ch] $(BPFTOOLDIR)/Makefile) \ OUTPUT=$(HOST_BUILD_DIR)/bpftool/ \ LIBBPF_OUTPUT=$(HOST_BUILD_DIR)/libbpf/ \ LIBBPF_DESTDIR=$(HOST_SCRATCH_DIR)/ \ - prefix= DESTDIR=$(HOST_SCRATCH_DIR)/ install + prefix= DESTDIR=$(HOST_SCRATCH_DIR)/ install-bin all: docs From 65223741ae1b759a14cab84ba88888bb025f816d Mon Sep 17 00:00:00 2001 From: Hou Tao Date: Mon, 4 Oct 2021 17:48:55 +0800 Subject: [PATCH 048/181] bpf: Support writable context for bare tracepoint Commit 9df1c28bb752 ("bpf: add writable context for raw tracepoints") supports writable context for tracepoint, but it misses the support for bare tracepoint which has no associated trace event. Bare tracepoint is defined by DECLARE_TRACE(), so adding a corresponding DECLARE_TRACE_WRITABLE() macro to generate a definition in __bpf_raw_tp_map section for bare tracepoint in a similar way to DEFINE_TRACE_WRITABLE(). Signed-off-by: Hou Tao Signed-off-by: Andrii Nakryiko Acked-by: Andrii Nakryiko Link: https://lore.kernel.org/bpf/20211004094857.30868-2-hotforest@gmail.com --- include/trace/bpf_probe.h | 19 +++++++++++++++---- 1 file changed, 15 insertions(+), 4 deletions(-) diff --git a/include/trace/bpf_probe.h b/include/trace/bpf_probe.h index a23be89119aa5..a8e97f84b6527 100644 --- a/include/trace/bpf_probe.h +++ b/include/trace/bpf_probe.h @@ -93,8 +93,7 @@ __section("__bpf_raw_tp_map") = { \ #define FIRST(x, ...) x -#undef DEFINE_EVENT_WRITABLE -#define DEFINE_EVENT_WRITABLE(template, call, proto, args, size) \ +#define __CHECK_WRITABLE_BUF_SIZE(call, proto, args, size) \ static inline void bpf_test_buffer_##call(void) \ { \ /* BUILD_BUG_ON() is ignored if the code is completely eliminated, but \ @@ -103,8 +102,12 @@ static inline void bpf_test_buffer_##call(void) \ */ \ FIRST(proto); \ (void)BUILD_BUG_ON_ZERO(size != sizeof(*FIRST(args))); \ -} \ -__DEFINE_EVENT(template, call, PARAMS(proto), PARAMS(args), size) +} + +#undef DEFINE_EVENT_WRITABLE +#define DEFINE_EVENT_WRITABLE(template, call, proto, args, size) \ + __CHECK_WRITABLE_BUF_SIZE(call, PARAMS(proto), PARAMS(args), size) \ + __DEFINE_EVENT(template, call, PARAMS(proto), PARAMS(args), size) #undef DEFINE_EVENT #define DEFINE_EVENT(template, call, proto, args) \ @@ -119,9 +122,17 @@ __DEFINE_EVENT(template, call, PARAMS(proto), PARAMS(args), size) __BPF_DECLARE_TRACE(call, PARAMS(proto), PARAMS(args)) \ __DEFINE_EVENT(call, call, PARAMS(proto), PARAMS(args), 0) +#undef DECLARE_TRACE_WRITABLE +#define DECLARE_TRACE_WRITABLE(call, proto, args, size) \ + __CHECK_WRITABLE_BUF_SIZE(call, PARAMS(proto), PARAMS(args), size) \ + __BPF_DECLARE_TRACE(call, PARAMS(proto), PARAMS(args)) \ + __DEFINE_EVENT(call, call, PARAMS(proto), PARAMS(args), size) + #include TRACE_INCLUDE(TRACE_INCLUDE_FILE) +#undef DECLARE_TRACE_WRITABLE #undef DEFINE_EVENT_WRITABLE +#undef __CHECK_WRITABLE_BUF_SIZE #undef __DEFINE_EVENT #undef FIRST From ccaf12d6215a56836472db220520cda8024d6c4f Mon Sep 17 00:00:00 2001 From: Hou Tao Date: Mon, 4 Oct 2021 17:48:56 +0800 Subject: [PATCH 049/181] libbpf: Support detecting and attaching of writable tracepoint program Program on writable tracepoint is BPF_PROG_TYPE_RAW_TRACEPOINT_WRITABLE, but its attachment is the same as BPF_PROG_TYPE_RAW_TRACEPOINT. Signed-off-by: Hou Tao Signed-off-by: Andrii Nakryiko Link: https://lore.kernel.org/bpf/20211004094857.30868-3-hotforest@gmail.com --- tools/lib/bpf/libbpf.c | 26 +++++++++++++++++++++----- 1 file changed, 21 insertions(+), 5 deletions(-) diff --git a/tools/lib/bpf/libbpf.c b/tools/lib/bpf/libbpf.c index ed313fd491bd2..ae0889bebe329 100644 --- a/tools/lib/bpf/libbpf.c +++ b/tools/lib/bpf/libbpf.c @@ -8077,6 +8077,8 @@ static const struct bpf_sec_def section_defs[] = { SEC_DEF("tp/", TRACEPOINT, 0, SEC_NONE, attach_tp), SEC_DEF("raw_tracepoint/", RAW_TRACEPOINT, 0, SEC_NONE, attach_raw_tp), SEC_DEF("raw_tp/", RAW_TRACEPOINT, 0, SEC_NONE, attach_raw_tp), + SEC_DEF("raw_tracepoint.w/", RAW_TRACEPOINT_WRITABLE, 0, SEC_NONE, attach_raw_tp), + SEC_DEF("raw_tp.w/", RAW_TRACEPOINT_WRITABLE, 0, SEC_NONE, attach_raw_tp), SEC_DEF("tp_btf/", TRACING, BPF_TRACE_RAW_TP, SEC_ATTACH_BTF, attach_trace), SEC_DEF("fentry/", TRACING, BPF_TRACE_FENTRY, SEC_ATTACH_BTF, attach_trace), SEC_DEF("fmod_ret/", TRACING, BPF_MODIFY_RETURN, SEC_ATTACH_BTF, attach_trace), @@ -9846,12 +9848,26 @@ struct bpf_link *bpf_program__attach_raw_tracepoint(const struct bpf_program *pr static struct bpf_link *attach_raw_tp(const struct bpf_program *prog, long cookie) { - const char *tp_name; + static const char *const prefixes[] = { + "raw_tp/", + "raw_tracepoint/", + "raw_tp.w/", + "raw_tracepoint.w/", + }; + size_t i; + const char *tp_name = NULL; - if (str_has_pfx(prog->sec_name, "raw_tp/")) - tp_name = prog->sec_name + sizeof("raw_tp/") - 1; - else - tp_name = prog->sec_name + sizeof("raw_tracepoint/") - 1; + for (i = 0; i < ARRAY_SIZE(prefixes); i++) { + if (str_has_pfx(prog->sec_name, prefixes[i])) { + tp_name = prog->sec_name + strlen(prefixes[i]); + break; + } + } + if (!tp_name) { + pr_warn("prog '%s': invalid section name '%s'\n", + prog->name, prog->sec_name); + return libbpf_err_ptr(-EINVAL); + } return bpf_program__attach_raw_tracepoint(prog, tp_name); } From fa7f17d066bd0996b930b664aa0ed1f213fc1828 Mon Sep 17 00:00:00 2001 From: Hou Tao Date: Mon, 4 Oct 2021 17:48:57 +0800 Subject: [PATCH 050/181] bpf/selftests: Add test for writable bare tracepoint Add a writable bare tracepoint in bpf_testmod module, and trigger its calling when reading /sys/kernel/bpf_testmod with a specific buffer length. The reading will return the value in writable context if the early return flag is enabled in writable context. Signed-off-by: Hou Tao Signed-off-by: Andrii Nakryiko Acked-by: Andrii Nakryiko Link: https://lore.kernel.org/bpf/20211004094857.30868-4-hotforest@gmail.com --- .../bpf/bpf_testmod/bpf_testmod-events.h | 15 ++++++++ .../selftests/bpf/bpf_testmod/bpf_testmod.c | 10 ++++++ .../selftests/bpf/bpf_testmod/bpf_testmod.h | 5 +++ .../selftests/bpf/prog_tests/module_attach.c | 35 +++++++++++++++++++ .../selftests/bpf/progs/test_module_attach.c | 14 ++++++++ tools/testing/selftests/bpf/test_progs.c | 4 +-- tools/testing/selftests/bpf/test_progs.h | 2 ++ 7 files changed, 83 insertions(+), 2 deletions(-) diff --git a/tools/testing/selftests/bpf/bpf_testmod/bpf_testmod-events.h b/tools/testing/selftests/bpf/bpf_testmod/bpf_testmod-events.h index 89c6d58e5dd68..11ee801e75e7e 100644 --- a/tools/testing/selftests/bpf/bpf_testmod/bpf_testmod-events.h +++ b/tools/testing/selftests/bpf/bpf_testmod/bpf_testmod-events.h @@ -34,6 +34,21 @@ DECLARE_TRACE(bpf_testmod_test_write_bare, TP_ARGS(task, ctx) ); +#undef BPF_TESTMOD_DECLARE_TRACE +#ifdef DECLARE_TRACE_WRITABLE +#define BPF_TESTMOD_DECLARE_TRACE(call, proto, args, size) \ + DECLARE_TRACE_WRITABLE(call, PARAMS(proto), PARAMS(args), size) +#else +#define BPF_TESTMOD_DECLARE_TRACE(call, proto, args, size) \ + DECLARE_TRACE(call, PARAMS(proto), PARAMS(args)) +#endif + +BPF_TESTMOD_DECLARE_TRACE(bpf_testmod_test_writable_bare, + TP_PROTO(struct bpf_testmod_test_writable_ctx *ctx), + TP_ARGS(ctx), + sizeof(struct bpf_testmod_test_writable_ctx) +); + #endif /* _BPF_TESTMOD_EVENTS_H */ #undef TRACE_INCLUDE_PATH diff --git a/tools/testing/selftests/bpf/bpf_testmod/bpf_testmod.c b/tools/testing/selftests/bpf/bpf_testmod/bpf_testmod.c index b892948dc134c..5d52ea2768df4 100644 --- a/tools/testing/selftests/bpf/bpf_testmod/bpf_testmod.c +++ b/tools/testing/selftests/bpf/bpf_testmod/bpf_testmod.c @@ -50,6 +50,16 @@ bpf_testmod_test_read(struct file *file, struct kobject *kobj, if (bpf_testmod_loop_test(101) > 100) trace_bpf_testmod_test_read(current, &ctx); + /* Magic number to enable writable tp */ + if (len == 64) { + struct bpf_testmod_test_writable_ctx writable = { + .val = 1024, + }; + trace_bpf_testmod_test_writable_bare(&writable); + if (writable.early_ret) + return snprintf(buf, len, "%d\n", writable.val); + } + return -EIO; /* always fail */ } EXPORT_SYMBOL(bpf_testmod_test_read); diff --git a/tools/testing/selftests/bpf/bpf_testmod/bpf_testmod.h b/tools/testing/selftests/bpf/bpf_testmod/bpf_testmod.h index b3892dc401113..0d71e2607832e 100644 --- a/tools/testing/selftests/bpf/bpf_testmod/bpf_testmod.h +++ b/tools/testing/selftests/bpf/bpf_testmod/bpf_testmod.h @@ -17,4 +17,9 @@ struct bpf_testmod_test_write_ctx { size_t len; }; +struct bpf_testmod_test_writable_ctx { + bool early_ret; + int val; +}; + #endif /* _BPF_TESTMOD_H */ diff --git a/tools/testing/selftests/bpf/prog_tests/module_attach.c b/tools/testing/selftests/bpf/prog_tests/module_attach.c index 1797a6e4d6d84..6d0e50dcf47cc 100644 --- a/tools/testing/selftests/bpf/prog_tests/module_attach.c +++ b/tools/testing/selftests/bpf/prog_tests/module_attach.c @@ -2,10 +2,36 @@ /* Copyright (c) 2020 Facebook */ #include +#include #include "test_module_attach.skel.h" static int duration; +static int trigger_module_test_writable(int *val) +{ + int fd, err; + char buf[65]; + ssize_t rd; + + fd = open(BPF_TESTMOD_TEST_FILE, O_RDONLY); + err = -errno; + if (!ASSERT_GE(fd, 0, "testmode_file_open")) + return err; + + rd = read(fd, buf, sizeof(buf) - 1); + err = -errno; + if (!ASSERT_GT(rd, 0, "testmod_file_rd_val")) { + close(fd); + return err; + } + + buf[rd] = '\0'; + *val = strtol(buf, NULL, 0); + close(fd); + + return 0; +} + static int delete_module(const char *name, int flags) { return syscall(__NR_delete_module, name, flags); @@ -19,6 +45,7 @@ void test_module_attach(void) struct test_module_attach__bss *bss; struct bpf_link *link; int err; + int writable_val = 0; skel = test_module_attach__open(); if (CHECK(!skel, "skel_open", "failed to open skeleton\n")) @@ -51,6 +78,14 @@ void test_module_attach(void) ASSERT_EQ(bss->fexit_ret, -EIO, "fexit_tet"); ASSERT_EQ(bss->fmod_ret_read_sz, READ_SZ, "fmod_ret"); + bss->raw_tp_writable_bare_early_ret = true; + bss->raw_tp_writable_bare_out_val = 0xf1f2f3f4; + ASSERT_OK(trigger_module_test_writable(&writable_val), + "trigger_writable"); + ASSERT_EQ(bss->raw_tp_writable_bare_in_val, 1024, "writable_test_in"); + ASSERT_EQ(bss->raw_tp_writable_bare_out_val, writable_val, + "writable_test_out"); + test_module_attach__detach(skel); /* attach fentry/fexit and make sure it get's module reference */ diff --git a/tools/testing/selftests/bpf/progs/test_module_attach.c b/tools/testing/selftests/bpf/progs/test_module_attach.c index bd37ceec55877..b36857093f71f 100644 --- a/tools/testing/selftests/bpf/progs/test_module_attach.c +++ b/tools/testing/selftests/bpf/progs/test_module_attach.c @@ -27,6 +27,20 @@ int BPF_PROG(handle_raw_tp_bare, return 0; } +int raw_tp_writable_bare_in_val = 0; +int raw_tp_writable_bare_early_ret = 0; +int raw_tp_writable_bare_out_val = 0; + +SEC("raw_tp.w/bpf_testmod_test_writable_bare") +int BPF_PROG(handle_raw_tp_writable_bare, + struct bpf_testmod_test_writable_ctx *writable) +{ + raw_tp_writable_bare_in_val = writable->val; + writable->early_ret = raw_tp_writable_bare_early_ret; + writable->val = raw_tp_writable_bare_out_val; + return 0; +} + __u32 tp_btf_read_sz = 0; SEC("tp_btf/bpf_testmod_test_read") diff --git a/tools/testing/selftests/bpf/test_progs.c b/tools/testing/selftests/bpf/test_progs.c index 2ed01f615d20f..007b4ff85fea7 100644 --- a/tools/testing/selftests/bpf/test_progs.c +++ b/tools/testing/selftests/bpf/test_progs.c @@ -747,7 +747,7 @@ int trigger_module_test_read(int read_sz) { int fd, err; - fd = open("/sys/kernel/bpf_testmod", O_RDONLY); + fd = open(BPF_TESTMOD_TEST_FILE, O_RDONLY); err = -errno; if (!ASSERT_GE(fd, 0, "testmod_file_open")) return err; @@ -769,7 +769,7 @@ int trigger_module_test_write(int write_sz) memset(buf, 'a', write_sz); buf[write_sz-1] = '\0'; - fd = open("/sys/kernel/bpf_testmod", O_WRONLY); + fd = open(BPF_TESTMOD_TEST_FILE, O_WRONLY); err = -errno; if (!ASSERT_GE(fd, 0, "testmod_file_open")) { free(buf); diff --git a/tools/testing/selftests/bpf/test_progs.h b/tools/testing/selftests/bpf/test_progs.h index 94bef0aa74cf5..9b8a1810b700a 100644 --- a/tools/testing/selftests/bpf/test_progs.h +++ b/tools/testing/selftests/bpf/test_progs.h @@ -301,3 +301,5 @@ int trigger_module_test_write(int write_sz); #else #define SYS_NANOSLEEP_KPROBE_NAME "sys_nanosleep" #endif + +#define BPF_TESTMOD_TEST_FILE "/sys/kernel/bpf_testmod" From 91b2c0afd00cb01d715d6e9503624ab33580d5b6 Mon Sep 17 00:00:00 2001 From: Yucong Sun Date: Wed, 6 Oct 2021 11:56:06 -0700 Subject: [PATCH 051/181] selftests/bpf: Add parallelism to test_progs This patch adds "-j" mode to test_progs, executing tests in multiple process. "-j" mode is optional, and works with all existing test selection mechanism, as well as "-v", "-l" etc. In "-j" mode, main process use UDS/SEQPACKET to communicate to each forked worker, commanding it to run tests and collect logs. After all tests are finished, a summary is printed. main process use multiple competing threads to dispatch work to worker, trying to keep them all busy. The test status will be printed as soon as it is finished, if there are error logs, it will be printed after the final summary line. By specifying "--debug", additional debug information on server/worker communication will be printed. Example output: > ./test_progs -n 15-20 -j [ 12.801730] bpf_testmod: loading out-of-tree module taints kernel. Launching 8 workers. #20 btf_split:OK #16 btf_endian:OK #18 btf_module:OK #17 btf_map_in_map:OK #19 btf_skc_cls_ingress:OK #15 btf_dump:OK Summary: 6/20 PASSED, 0 SKIPPED, 0 FAILED Signed-off-by: Yucong Sun Signed-off-by: Andrii Nakryiko Link: https://lore.kernel.org/bpf/20211006185619.364369-2-fallentree@fb.com --- tools/testing/selftests/bpf/test_progs.c | 611 +++++++++++++++++++++-- tools/testing/selftests/bpf/test_progs.h | 36 +- 2 files changed, 610 insertions(+), 37 deletions(-) diff --git a/tools/testing/selftests/bpf/test_progs.c b/tools/testing/selftests/bpf/test_progs.c index 007b4ff85fea7..20dd1e2f8d063 100644 --- a/tools/testing/selftests/bpf/test_progs.c +++ b/tools/testing/selftests/bpf/test_progs.c @@ -12,6 +12,11 @@ #include #include /* backtrace */ #include +#include /* get_nprocs */ +#include +#include +#include +#include /* Adapted from perf/util/string.c */ static bool glob_match(const char *str, const char *pat) @@ -48,6 +53,8 @@ struct prog_test_def { bool force_log; int error_cnt; int skip_cnt; + int sub_succ_cnt; + bool should_run; bool tested; bool need_cgroup_cleanup; @@ -97,6 +104,10 @@ static void dump_test_log(const struct prog_test_def *test, bool failed) if (stdout == env.stdout) return; + /* worker always holds log */ + if (env.worker_id != -1) + return; + fflush(stdout); /* exports env.log_buf & env.log_cnt */ if (env.verbosity > VERBOSE_NONE || test->force_log || failed) { @@ -107,8 +118,6 @@ static void dump_test_log(const struct prog_test_def *test, bool failed) fprintf(env.stdout, "\n"); } } - - fseeko(stdout, 0, SEEK_SET); /* rewind */ } static void skip_account(void) @@ -124,7 +133,8 @@ static void stdio_restore(void); /* A bunch of tests set custom affinity per-thread and/or per-process. Reset * it after each test/sub-test. */ -static void reset_affinity() { +static void reset_affinity(void) +{ cpu_set_t cpuset; int i, err; @@ -165,21 +175,21 @@ static void restore_netns(void) } } -void test__end_subtest() +void test__end_subtest(void) { struct prog_test_def *test = env.test; int sub_error_cnt = test->error_cnt - test->old_error_cnt; dump_test_log(test, sub_error_cnt); - fprintf(env.stdout, "#%d/%d %s/%s:%s\n", + fprintf(stdout, "#%d/%d %s/%s:%s\n", test->test_num, test->subtest_num, test->test_name, test->subtest_name, sub_error_cnt ? "FAIL" : (test->skip_cnt ? "SKIP" : "OK")); if (sub_error_cnt) - env.fail_cnt++; + test->error_cnt++; else if (test->skip_cnt == 0) - env.sub_succ_cnt++; + test->sub_succ_cnt++; skip_account(); free(test->subtest_name); @@ -217,7 +227,8 @@ bool test__start_subtest(const char *name) return true; } -void test__force_log() { +void test__force_log(void) +{ env.test->force_log = true; } @@ -474,6 +485,8 @@ enum ARG_KEYS { ARG_LIST_TEST_NAMES = 'l', ARG_TEST_NAME_GLOB_ALLOWLIST = 'a', ARG_TEST_NAME_GLOB_DENYLIST = 'd', + ARG_NUM_WORKERS = 'j', + ARG_DEBUG = -1, }; static const struct argp_option opts[] = { @@ -495,6 +508,10 @@ static const struct argp_option opts[] = { "Run tests with name matching the pattern (supports '*' wildcard)." }, { "deny", ARG_TEST_NAME_GLOB_DENYLIST, "NAMES", 0, "Don't run tests with name matching the pattern (supports '*' wildcard)." }, + { "workers", ARG_NUM_WORKERS, "WORKERS", OPTION_ARG_OPTIONAL, + "Number of workers to run in parallel, default to number of cpus." }, + { "debug", ARG_DEBUG, NULL, 0, + "print extra debug information for test_progs." }, {}, }; @@ -650,7 +667,7 @@ static error_t parse_arg(int key, char *arg, struct argp_state *state) fprintf(stderr, "Unable to setenv SELFTESTS_VERBOSE=1 (errno=%d)", errno); - return -1; + return -EINVAL; } } @@ -661,6 +678,20 @@ static error_t parse_arg(int key, char *arg, struct argp_state *state) case ARG_LIST_TEST_NAMES: env->list_test_names = true; break; + case ARG_NUM_WORKERS: + if (arg) { + env->workers = atoi(arg); + if (!env->workers) { + fprintf(stderr, "Invalid number of worker: %s.", arg); + return -EINVAL; + } + } else { + env->workers = get_nprocs(); + } + break; + case ARG_DEBUG: + env->debug = true; + break; case ARGP_KEY_ARG: argp_usage(state); break; @@ -678,7 +709,7 @@ static void stdio_hijack(void) env.stdout = stdout; env.stderr = stderr; - if (env.verbosity > VERBOSE_NONE) { + if (env.verbosity > VERBOSE_NONE && env.worker_id == -1) { /* nothing to do, output to stdout by default */ return; } @@ -704,10 +735,6 @@ static void stdio_restore(void) return; fclose(stdout); - free(env.log_buf); - - env.log_buf = NULL; - env.log_cnt = 0; stdout = env.stdout; stderr = env.stderr; @@ -794,11 +821,461 @@ void crash_handler(int signum) dump_test_log(env.test, true); if (env.stdout) stdio_restore(); - + if (env.worker_id != -1) + fprintf(stderr, "[%d]: ", env.worker_id); fprintf(stderr, "Caught signal #%d!\nStack trace:\n", signum); backtrace_symbols_fd(bt, sz, STDERR_FILENO); } +static void sigint_handler(int signum) +{ + int i; + + for (i = 0; i < env.workers; i++) + if (env.worker_socks[i] > 0) + close(env.worker_socks[i]); +} + +static int current_test_idx; +static pthread_mutex_t current_test_lock; +static pthread_mutex_t stdout_output_lock; + +struct test_result { + int error_cnt; + int skip_cnt; + int sub_succ_cnt; + + size_t log_cnt; + char *log_buf; +}; + +static struct test_result test_results[ARRAY_SIZE(prog_test_defs)]; + +static inline const char *str_msg(const struct msg *msg, char *buf) +{ + switch (msg->type) { + case MSG_DO_TEST: + sprintf(buf, "MSG_DO_TEST %d", msg->do_test.test_num); + break; + case MSG_TEST_DONE: + sprintf(buf, "MSG_TEST_DONE %d (log: %d)", + msg->test_done.test_num, + msg->test_done.have_log); + break; + case MSG_TEST_LOG: + sprintf(buf, "MSG_TEST_LOG (cnt: %ld, last: %d)", + strlen(msg->test_log.log_buf), + msg->test_log.is_last); + break; + case MSG_EXIT: + sprintf(buf, "MSG_EXIT"); + break; + default: + sprintf(buf, "UNKNOWN"); + break; + } + + return buf; +} + +static int send_message(int sock, const struct msg *msg) +{ + char buf[256]; + + if (env.debug) + fprintf(stderr, "Sending msg: %s\n", str_msg(msg, buf)); + return send(sock, msg, sizeof(*msg), 0); +} + +static int recv_message(int sock, struct msg *msg) +{ + int ret; + char buf[256]; + + memset(msg, 0, sizeof(*msg)); + ret = recv(sock, msg, sizeof(*msg), 0); + if (ret >= 0) { + if (env.debug) + fprintf(stderr, "Received msg: %s\n", str_msg(msg, buf)); + } + return ret; +} + +static void run_one_test(int test_num) +{ + struct prog_test_def *test = &prog_test_defs[test_num]; + + env.test = test; + + test->run_test(); + + /* ensure last sub-test is finalized properly */ + if (test->subtest_name) + test__end_subtest(); + + test->tested = true; + + dump_test_log(test, test->error_cnt); + + reset_affinity(); + restore_netns(); + if (test->need_cgroup_cleanup) + cleanup_cgroup_environment(); +} + +struct dispatch_data { + int worker_id; + int sock_fd; +}; + +static void *dispatch_thread(void *ctx) +{ + struct dispatch_data *data = ctx; + int sock_fd; + FILE *log_fd = NULL; + + sock_fd = data->sock_fd; + + while (true) { + int test_to_run = -1; + struct prog_test_def *test; + struct test_result *result; + + /* grab a test */ + { + pthread_mutex_lock(¤t_test_lock); + + if (current_test_idx >= prog_test_cnt) { + pthread_mutex_unlock(¤t_test_lock); + goto done; + } + + test = &prog_test_defs[current_test_idx]; + test_to_run = current_test_idx; + current_test_idx++; + + pthread_mutex_unlock(¤t_test_lock); + } + + if (!test->should_run) + continue; + + /* run test through worker */ + { + struct msg msg_do_test; + + msg_do_test.type = MSG_DO_TEST; + msg_do_test.do_test.test_num = test_to_run; + if (send_message(sock_fd, &msg_do_test) < 0) { + perror("Fail to send command"); + goto done; + } + env.worker_current_test[data->worker_id] = test_to_run; + } + + /* wait for test done */ + { + int err; + struct msg msg_test_done; + + err = recv_message(sock_fd, &msg_test_done); + if (err < 0) + goto error; + if (msg_test_done.type != MSG_TEST_DONE) + goto error; + if (test_to_run != msg_test_done.test_done.test_num) + goto error; + + test->tested = true; + result = &test_results[test_to_run]; + + result->error_cnt = msg_test_done.test_done.error_cnt; + result->skip_cnt = msg_test_done.test_done.skip_cnt; + result->sub_succ_cnt = msg_test_done.test_done.sub_succ_cnt; + + /* collect all logs */ + if (msg_test_done.test_done.have_log) { + log_fd = open_memstream(&result->log_buf, &result->log_cnt); + if (!log_fd) + goto error; + + while (true) { + struct msg msg_log; + + if (recv_message(sock_fd, &msg_log) < 0) + goto error; + if (msg_log.type != MSG_TEST_LOG) + goto error; + + fprintf(log_fd, "%s", msg_log.test_log.log_buf); + if (msg_log.test_log.is_last) + break; + } + fclose(log_fd); + log_fd = NULL; + } + /* output log */ + { + pthread_mutex_lock(&stdout_output_lock); + + if (result->log_cnt) { + result->log_buf[result->log_cnt] = '\0'; + fprintf(stdout, "%s", result->log_buf); + if (result->log_buf[result->log_cnt - 1] != '\n') + fprintf(stdout, "\n"); + } + + fprintf(stdout, "#%d %s:%s\n", + test->test_num, test->test_name, + result->error_cnt ? "FAIL" : (result->skip_cnt ? "SKIP" : "OK")); + + pthread_mutex_unlock(&stdout_output_lock); + } + + } /* wait for test done */ + } /* while (true) */ +error: + if (env.debug) + fprintf(stderr, "[%d]: Protocol/IO error: %s.\n", data->worker_id, strerror(errno)); + + if (log_fd) + fclose(log_fd); +done: + { + struct msg msg_exit; + + msg_exit.type = MSG_EXIT; + if (send_message(sock_fd, &msg_exit) < 0) { + if (env.debug) + fprintf(stderr, "[%d]: send_message msg_exit: %s.\n", + data->worker_id, strerror(errno)); + } + } + return NULL; +} + +static void print_all_error_logs(void) +{ + int i; + + if (env.fail_cnt) + fprintf(stdout, "\nAll error logs:\n"); + + /* print error logs again */ + for (i = 0; i < prog_test_cnt; i++) { + struct prog_test_def *test; + struct test_result *result; + + test = &prog_test_defs[i]; + result = &test_results[i]; + + if (!test->tested || !result->error_cnt) + continue; + + fprintf(stdout, "\n#%d %s:%s\n", + test->test_num, test->test_name, + result->error_cnt ? "FAIL" : (result->skip_cnt ? "SKIP" : "OK")); + + if (result->log_cnt) { + result->log_buf[result->log_cnt] = '\0'; + fprintf(stdout, "%s", result->log_buf); + if (result->log_buf[result->log_cnt - 1] != '\n') + fprintf(stdout, "\n"); + } + } +} + +static int server_main(void) +{ + pthread_t *dispatcher_threads; + struct dispatch_data *data; + struct sigaction sigact_int = { + .sa_handler = sigint_handler, + .sa_flags = SA_RESETHAND, + }; + int i; + + sigaction(SIGINT, &sigact_int, NULL); + + dispatcher_threads = calloc(sizeof(pthread_t), env.workers); + data = calloc(sizeof(struct dispatch_data), env.workers); + + env.worker_current_test = calloc(sizeof(int), env.workers); + for (i = 0; i < env.workers; i++) { + int rc; + + data[i].worker_id = i; + data[i].sock_fd = env.worker_socks[i]; + rc = pthread_create(&dispatcher_threads[i], NULL, dispatch_thread, &data[i]); + if (rc < 0) { + perror("Failed to launch dispatcher thread"); + exit(EXIT_ERR_SETUP_INFRA); + } + } + + /* wait for all dispatcher to finish */ + for (i = 0; i < env.workers; i++) { + while (true) { + int ret = pthread_tryjoin_np(dispatcher_threads[i], NULL); + + if (!ret) { + break; + } else if (ret == EBUSY) { + if (env.debug) + fprintf(stderr, "Still waiting for thread %d (test %d).\n", + i, env.worker_current_test[i] + 1); + usleep(1000 * 1000); + continue; + } else { + fprintf(stderr, "Unexpected error joining dispatcher thread: %d", ret); + break; + } + } + } + free(dispatcher_threads); + free(env.worker_current_test); + free(data); + + /* generate summary */ + fflush(stderr); + fflush(stdout); + + for (i = 0; i < prog_test_cnt; i++) { + struct prog_test_def *current_test; + struct test_result *result; + + current_test = &prog_test_defs[i]; + result = &test_results[i]; + + if (!current_test->tested) + continue; + + env.succ_cnt += result->error_cnt ? 0 : 1; + env.skip_cnt += result->skip_cnt; + if (result->error_cnt) + env.fail_cnt++; + env.sub_succ_cnt += result->sub_succ_cnt; + } + + fprintf(stdout, "Summary: %d/%d PASSED, %d SKIPPED, %d FAILED\n", + env.succ_cnt, env.sub_succ_cnt, env.skip_cnt, env.fail_cnt); + + print_all_error_logs(); + + /* reap all workers */ + for (i = 0; i < env.workers; i++) { + int wstatus, pid; + + pid = waitpid(env.worker_pids[i], &wstatus, 0); + if (pid != env.worker_pids[i]) + perror("Unable to reap worker"); + } + + return 0; +} + +static int worker_main(int sock) +{ + save_netns(); + + while (true) { + /* receive command */ + struct msg msg; + + if (recv_message(sock, &msg) < 0) + goto out; + + switch (msg.type) { + case MSG_EXIT: + if (env.debug) + fprintf(stderr, "[%d]: worker exit.\n", + env.worker_id); + goto out; + case MSG_DO_TEST: { + int test_to_run; + struct prog_test_def *test; + struct msg msg_done; + + test_to_run = msg.do_test.test_num; + test = &prog_test_defs[test_to_run]; + + if (env.debug) + fprintf(stderr, "[%d]: #%d:%s running.\n", + env.worker_id, + test_to_run + 1, + test->test_name); + + stdio_hijack(); + + run_one_test(test_to_run); + + stdio_restore(); + + memset(&msg_done, 0, sizeof(msg_done)); + msg_done.type = MSG_TEST_DONE; + msg_done.test_done.test_num = test_to_run; + msg_done.test_done.error_cnt = test->error_cnt; + msg_done.test_done.skip_cnt = test->skip_cnt; + msg_done.test_done.sub_succ_cnt = test->sub_succ_cnt; + msg_done.test_done.have_log = false; + + if (env.verbosity > VERBOSE_NONE || test->force_log || test->error_cnt) { + if (env.log_cnt) + msg_done.test_done.have_log = true; + } + if (send_message(sock, &msg_done) < 0) { + perror("Fail to send message done"); + goto out; + } + + /* send logs */ + if (msg_done.test_done.have_log) { + char *src; + size_t slen; + + src = env.log_buf; + slen = env.log_cnt; + while (slen) { + struct msg msg_log; + char *dest; + size_t len; + + memset(&msg_log, 0, sizeof(msg_log)); + msg_log.type = MSG_TEST_LOG; + dest = msg_log.test_log.log_buf; + len = slen >= MAX_LOG_TRUNK_SIZE ? MAX_LOG_TRUNK_SIZE : slen; + memcpy(dest, src, len); + + src += len; + slen -= len; + if (!slen) + msg_log.test_log.is_last = true; + + assert(send_message(sock, &msg_log) >= 0); + } + } + if (env.log_buf) { + free(env.log_buf); + env.log_buf = NULL; + env.log_cnt = 0; + } + if (env.debug) + fprintf(stderr, "[%d]: #%d:%s done.\n", + env.worker_id, + test_to_run + 1, + test->test_name); + break; + } /* case MSG_DO_TEST */ + default: + if (env.debug) + fprintf(stderr, "[%d]: unknown message.\n", env.worker_id); + return -1; + } + } +out: + return 0; +} + int main(int argc, char **argv) { static const struct argp argp = { @@ -809,7 +1286,7 @@ int main(int argc, char **argv) struct sigaction sigact = { .sa_handler = crash_handler, .sa_flags = SA_RESETHAND, - }; + }; int err, i; sigaction(SIGSEGV, &sigact, NULL); @@ -837,21 +1314,77 @@ int main(int argc, char **argv) return -1; } - save_netns(); - stdio_hijack(); + env.stdout = stdout; + env.stderr = stderr; + env.has_testmod = true; if (!env.list_test_names && load_bpf_testmod()) { fprintf(env.stderr, "WARNING! Selftests relying on bpf_testmod.ko will be skipped.\n"); env.has_testmod = false; } + + /* initializing tests */ for (i = 0; i < prog_test_cnt; i++) { struct prog_test_def *test = &prog_test_defs[i]; - env.test = test; test->test_num = i + 1; - - if (!should_run(&env.test_selector, + if (should_run(&env.test_selector, test->test_num, test->test_name)) + test->should_run = true; + else + test->should_run = false; + } + + /* ignore workers if we are just listing */ + if (env.get_test_cnt || env.list_test_names) + env.workers = 0; + + /* launch workers if requested */ + env.worker_id = -1; /* main process */ + if (env.workers) { + env.worker_pids = calloc(sizeof(__pid_t), env.workers); + env.worker_socks = calloc(sizeof(int), env.workers); + if (env.debug) + fprintf(stdout, "Launching %d workers.\n", env.workers); + for (i = 0; i < env.workers; i++) { + int sv[2]; + pid_t pid; + + if (socketpair(AF_UNIX, SOCK_SEQPACKET | SOCK_CLOEXEC, 0, sv) < 0) { + perror("Fail to create worker socket"); + return -1; + } + pid = fork(); + if (pid < 0) { + perror("Failed to fork worker"); + return -1; + } else if (pid != 0) { /* main process */ + close(sv[1]); + env.worker_pids[i] = pid; + env.worker_socks[i] = sv[0]; + } else { /* inside each worker process */ + close(sv[0]); + env.worker_id = i; + return worker_main(sv[1]); + } + } + + if (env.worker_id == -1) { + server_main(); + goto out; + } + } + + /* The rest of the main process */ + + /* on single mode */ + save_netns(); + + for (i = 0; i < prog_test_cnt; i++) { + struct prog_test_def *test = &prog_test_defs[i]; + struct test_result *result; + + if (!test->should_run) continue; if (env.get_test_cnt) { @@ -865,33 +1398,35 @@ int main(int argc, char **argv) continue; } - test->run_test(); - /* ensure last sub-test is finalized properly */ - if (test->subtest_name) - test__end_subtest(); + stdio_hijack(); - test->tested = true; + run_one_test(i); - dump_test_log(test, test->error_cnt); + stdio_restore(); fprintf(env.stdout, "#%d %s:%s\n", test->test_num, test->test_name, test->error_cnt ? "FAIL" : (test->skip_cnt ? "SKIP" : "OK")); + result = &test_results[i]; + result->error_cnt = test->error_cnt; + if (env.log_buf) { + result->log_buf = strdup(env.log_buf); + result->log_cnt = env.log_cnt; + + free(env.log_buf); + env.log_buf = NULL; + env.log_cnt = 0; + } + if (test->error_cnt) env.fail_cnt++; else env.succ_cnt++; - skip_account(); - reset_affinity(); - restore_netns(); - if (test->need_cgroup_cleanup) - cleanup_cgroup_environment(); + skip_account(); + env.sub_succ_cnt += test->sub_succ_cnt; } - if (!env.list_test_names && env.has_testmod) - unload_bpf_testmod(); - stdio_restore(); if (env.get_test_cnt) { printf("%d\n", env.succ_cnt); @@ -904,14 +1439,18 @@ int main(int argc, char **argv) fprintf(stdout, "Summary: %d/%d PASSED, %d SKIPPED, %d FAILED\n", env.succ_cnt, env.sub_succ_cnt, env.skip_cnt, env.fail_cnt); + print_all_error_logs(); + + close(env.saved_netns_fd); out: + if (!env.list_test_names && env.has_testmod) + unload_bpf_testmod(); free_str_set(&env.test_selector.blacklist); free_str_set(&env.test_selector.whitelist); free(env.test_selector.num_set); free_str_set(&env.subtest_selector.blacklist); free_str_set(&env.subtest_selector.whitelist); free(env.subtest_selector.num_set); - close(env.saved_netns_fd); if (env.succ_cnt + env.fail_cnt + env.skip_cnt == 0) return EXIT_NO_TEST; diff --git a/tools/testing/selftests/bpf/test_progs.h b/tools/testing/selftests/bpf/test_progs.h index 9b8a1810b700a..93c1ff7055334 100644 --- a/tools/testing/selftests/bpf/test_progs.h +++ b/tools/testing/selftests/bpf/test_progs.h @@ -62,6 +62,7 @@ struct test_env { struct test_selector test_selector; struct test_selector subtest_selector; bool verifier_stats; + bool debug; enum verbosity verbosity; bool jit_enabled; @@ -69,7 +70,8 @@ struct test_env { bool get_test_cnt; bool list_test_names; - struct prog_test_def *test; + struct prog_test_def *test; /* current running tests */ + FILE *stdout; FILE *stderr; char *log_buf; @@ -82,6 +84,38 @@ struct test_env { int skip_cnt; /* skipped tests */ int saved_netns_fd; + int workers; /* number of worker process */ + int worker_id; /* id number of current worker, main process is -1 */ + pid_t *worker_pids; /* array of worker pids */ + int *worker_socks; /* array of worker socks */ + int *worker_current_test; /* array of current running test for each worker */ +}; + +#define MAX_LOG_TRUNK_SIZE 8192 +enum msg_type { + MSG_DO_TEST = 0, + MSG_TEST_DONE = 1, + MSG_TEST_LOG = 2, + MSG_EXIT = 255, +}; +struct msg { + enum msg_type type; + union { + struct { + int test_num; + } do_test; + struct { + int test_num; + int sub_succ_cnt; + int error_cnt; + int skip_cnt; + bool have_log; + } test_done; + struct { + char log_buf[MAX_LOG_TRUNK_SIZE + 1]; + bool is_last; + } test_log; + }; }; extern struct test_env env; From 6587ff58cea4a7f252b8bbc4031a1bf4ec3781d8 Mon Sep 17 00:00:00 2001 From: Yucong Sun Date: Wed, 6 Oct 2021 11:56:07 -0700 Subject: [PATCH 052/181] selftests/bpf: Allow some tests to be executed in sequence This patch allows tests to define serial_test_name() instead of test_name(), and this will make test_progs execute those in sequence after all other tests finished executing concurrently. Signed-off-by: Yucong Sun Signed-off-by: Andrii Nakryiko Link: https://lore.kernel.org/bpf/20211006185619.364369-3-fallentree@fb.com --- tools/testing/selftests/bpf/test_progs.c | 60 +++++++++++++++++++++--- 1 file changed, 54 insertions(+), 6 deletions(-) diff --git a/tools/testing/selftests/bpf/test_progs.c b/tools/testing/selftests/bpf/test_progs.c index 20dd1e2f8d063..1f4a485669917 100644 --- a/tools/testing/selftests/bpf/test_progs.c +++ b/tools/testing/selftests/bpf/test_progs.c @@ -50,6 +50,7 @@ struct prog_test_def { const char *test_name; int test_num; void (*run_test)(void); + void (*run_serial_test)(void); bool force_log; int error_cnt; int skip_cnt; @@ -457,14 +458,17 @@ static int load_bpf_testmod(void) } /* extern declarations for test funcs */ -#define DEFINE_TEST(name) extern void test_##name(void); +#define DEFINE_TEST(name) \ + extern void test_##name(void) __weak; \ + extern void serial_test_##name(void) __weak; #include #undef DEFINE_TEST static struct prog_test_def prog_test_defs[] = { -#define DEFINE_TEST(name) { \ - .test_name = #name, \ - .run_test = &test_##name, \ +#define DEFINE_TEST(name) { \ + .test_name = #name, \ + .run_test = &test_##name, \ + .run_serial_test = &serial_test_##name, \ }, #include #undef DEFINE_TEST @@ -907,7 +911,10 @@ static void run_one_test(int test_num) env.test = test; - test->run_test(); + if (test->run_test) + test->run_test(); + else if (test->run_serial_test) + test->run_serial_test(); /* ensure last sub-test is finalized properly */ if (test->subtest_name) @@ -957,7 +964,7 @@ static void *dispatch_thread(void *ctx) pthread_mutex_unlock(¤t_test_lock); } - if (!test->should_run) + if (!test->should_run || test->run_serial_test) continue; /* run test through worker */ @@ -1136,6 +1143,40 @@ static int server_main(void) free(env.worker_current_test); free(data); + /* run serial tests */ + save_netns(); + + for (int i = 0; i < prog_test_cnt; i++) { + struct prog_test_def *test = &prog_test_defs[i]; + struct test_result *result = &test_results[i]; + + if (!test->should_run || !test->run_serial_test) + continue; + + stdio_hijack(); + + run_one_test(i); + + stdio_restore(); + if (env.log_buf) { + result->log_cnt = env.log_cnt; + result->log_buf = strdup(env.log_buf); + + free(env.log_buf); + env.log_buf = NULL; + env.log_cnt = 0; + } + restore_netns(); + + fprintf(stdout, "#%d %s:%s\n", + test->test_num, test->test_name, + test->error_cnt ? "FAIL" : (test->skip_cnt ? "SKIP" : "OK")); + + result->error_cnt = test->error_cnt; + result->skip_cnt = test->skip_cnt; + result->sub_succ_cnt = test->sub_succ_cnt; + } + /* generate summary */ fflush(stderr); fflush(stdout); @@ -1333,6 +1374,13 @@ int main(int argc, char **argv) test->should_run = true; else test->should_run = false; + + if ((test->run_test == NULL && test->run_serial_test == NULL) || + (test->run_test != NULL && test->run_serial_test != NULL)) { + fprintf(stderr, "Test %d:%s must have either test_%s() or serial_test_%sl() defined.\n", + test->test_num, test->test_name, test->test_name, test->test_name); + exit(EXIT_ERR_SETUP_INFRA); + } } /* ignore workers if we are just listing */ From e87c3434f81ae566693cfdc22370dc938b2989dd Mon Sep 17 00:00:00 2001 From: Yucong Sun Date: Wed, 6 Oct 2021 11:56:09 -0700 Subject: [PATCH 053/181] selftests/bpf: Add per worker cgroup suffix This patch make each worker use a unique cgroup base directory, thus allowing tests that uses cgroups to run concurrently. Signed-off-by: Yucong Sun Signed-off-by: Andrii Nakryiko Link: https://lore.kernel.org/bpf/20211006185619.364369-5-fallentree@fb.com --- tools/testing/selftests/bpf/cgroup_helpers.c | 6 +++--- tools/testing/selftests/bpf/cgroup_helpers.h | 2 +- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/tools/testing/selftests/bpf/cgroup_helpers.c b/tools/testing/selftests/bpf/cgroup_helpers.c index f3daa44a82660..8fcd44841bb28 100644 --- a/tools/testing/selftests/bpf/cgroup_helpers.c +++ b/tools/testing/selftests/bpf/cgroup_helpers.c @@ -11,6 +11,7 @@ #include #include #include +#include #include "cgroup_helpers.h" @@ -33,10 +34,9 @@ #define CGROUP_MOUNT_DFLT "/sys/fs/cgroup" #define NETCLS_MOUNT_PATH CGROUP_MOUNT_DFLT "/net_cls" #define CGROUP_WORK_DIR "/cgroup-test-work-dir" - #define format_cgroup_path(buf, path) \ - snprintf(buf, sizeof(buf), "%s%s%s", CGROUP_MOUNT_PATH, \ - CGROUP_WORK_DIR, path) + snprintf(buf, sizeof(buf), "%s%s%d%s", CGROUP_MOUNT_PATH, \ + CGROUP_WORK_DIR, getpid(), path) #define format_classid_path(buf) \ snprintf(buf, sizeof(buf), "%s%s", NETCLS_MOUNT_PATH, \ diff --git a/tools/testing/selftests/bpf/cgroup_helpers.h b/tools/testing/selftests/bpf/cgroup_helpers.h index 629da3854b3e7..fcc9cb91b2111 100644 --- a/tools/testing/selftests/bpf/cgroup_helpers.h +++ b/tools/testing/selftests/bpf/cgroup_helpers.h @@ -26,4 +26,4 @@ int join_classid(void); int setup_classid_environment(void); void cleanup_classid_environment(void); -#endif /* __CGROUP_HELPERS_H */ +#endif /* __CGROUP_HELPERS_H */ \ No newline at end of file From d719de0d2f3cbdb5890a147b90a51c5eaaef103e Mon Sep 17 00:00:00 2001 From: Yucong Sun Date: Wed, 6 Oct 2021 11:56:11 -0700 Subject: [PATCH 054/181] selftests/bpf: Fix race condition in enable_stats In parallel execution mode, this test now need to use atomic operation to avoid race condition. Signed-off-by: Yucong Sun Signed-off-by: Andrii Nakryiko Link: https://lore.kernel.org/bpf/20211006185619.364369-7-fallentree@fb.com --- tools/testing/selftests/bpf/progs/test_enable_stats.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tools/testing/selftests/bpf/progs/test_enable_stats.c b/tools/testing/selftests/bpf/progs/test_enable_stats.c index 01a002ade5292..1705097d01d7b 100644 --- a/tools/testing/selftests/bpf/progs/test_enable_stats.c +++ b/tools/testing/selftests/bpf/progs/test_enable_stats.c @@ -13,6 +13,6 @@ __u64 count = 0; SEC("raw_tracepoint/sys_enter") int test_enable_stats(void *ctx) { - count += 1; + __sync_fetch_and_add(&count, 1); return 0; } From 445e72c782a1f770440e229afeb0c4e386da943c Mon Sep 17 00:00:00 2001 From: Yucong Sun Date: Wed, 6 Oct 2021 11:56:12 -0700 Subject: [PATCH 055/181] selftests/bpf: Make cgroup_v1v2 use its own port This patch change cgroup_v1v2 use a different port, avoid conflict with other tests. Signed-off-by: Yucong Sun Signed-off-by: Andrii Nakryiko Link: https://lore.kernel.org/bpf/20211006185619.364369-8-fallentree@fb.com --- tools/testing/selftests/bpf/prog_tests/cgroup_v1v2.c | 2 +- tools/testing/selftests/bpf/progs/connect4_dropper.c | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/tools/testing/selftests/bpf/prog_tests/cgroup_v1v2.c b/tools/testing/selftests/bpf/prog_tests/cgroup_v1v2.c index ab3b9bc5e6d1c..9026b42914d31 100644 --- a/tools/testing/selftests/bpf/prog_tests/cgroup_v1v2.c +++ b/tools/testing/selftests/bpf/prog_tests/cgroup_v1v2.c @@ -46,7 +46,7 @@ void test_cgroup_v1v2(void) { struct network_helper_opts opts = {}; int server_fd, client_fd, cgroup_fd; - static const int port = 60123; + static const int port = 60120; /* Step 1: Check base connectivity works without any BPF. */ server_fd = start_server(AF_INET, SOCK_STREAM, NULL, port, 0); diff --git a/tools/testing/selftests/bpf/progs/connect4_dropper.c b/tools/testing/selftests/bpf/progs/connect4_dropper.c index b565d997810ad..d3f4c5e4fb696 100644 --- a/tools/testing/selftests/bpf/progs/connect4_dropper.c +++ b/tools/testing/selftests/bpf/progs/connect4_dropper.c @@ -18,7 +18,7 @@ int connect_v4_dropper(struct bpf_sock_addr *ctx) { if (ctx->type != SOCK_STREAM) return VERDICT_PROCEED; - if (ctx->user_port == bpf_htons(60123)) + if (ctx->user_port == bpf_htons(60120)) return VERDICT_REJECT; return VERDICT_PROCEED; } From 0f4feacc9155776fd2c7f1c7bcb41001d94990a0 Mon Sep 17 00:00:00 2001 From: Yucong Sun Date: Wed, 6 Oct 2021 11:56:15 -0700 Subject: [PATCH 056/181] selftests/bpf: Adding pid filtering for atomics test This make atomics test able to run in parallel with other tests. Signed-off-by: Yucong Sun Signed-off-by: Andrii Nakryiko Link: https://lore.kernel.org/bpf/20211006185619.364369-11-fallentree@fb.com --- tools/testing/selftests/bpf/prog_tests/atomics.c | 1 + tools/testing/selftests/bpf/progs/atomics.c | 16 ++++++++++++++++ 2 files changed, 17 insertions(+) diff --git a/tools/testing/selftests/bpf/prog_tests/atomics.c b/tools/testing/selftests/bpf/prog_tests/atomics.c index ba0e1efe5a45f..1486be5d3209f 100644 --- a/tools/testing/selftests/bpf/prog_tests/atomics.c +++ b/tools/testing/selftests/bpf/prog_tests/atomics.c @@ -225,6 +225,7 @@ void test_atomics(void) test__skip(); goto cleanup; } + skel->bss->pid = getpid(); if (test__start_subtest("add")) test_add(skel); diff --git a/tools/testing/selftests/bpf/progs/atomics.c b/tools/testing/selftests/bpf/progs/atomics.c index c245345e41ca0..16e57313204af 100644 --- a/tools/testing/selftests/bpf/progs/atomics.c +++ b/tools/testing/selftests/bpf/progs/atomics.c @@ -10,6 +10,8 @@ bool skip_tests __attribute((__section__(".data"))) = false; bool skip_tests = true; #endif +__u32 pid = 0; + __u64 add64_value = 1; __u64 add64_result = 0; __u32 add32_value = 1; @@ -21,6 +23,8 @@ __u64 add_noreturn_value = 1; SEC("fentry/bpf_fentry_test1") int BPF_PROG(add, int a) { + if (pid != (bpf_get_current_pid_tgid() >> 32)) + return 0; #ifdef ENABLE_ATOMICS_TESTS __u64 add_stack_value = 1; @@ -45,6 +49,8 @@ __s64 sub_noreturn_value = 1; SEC("fentry/bpf_fentry_test1") int BPF_PROG(sub, int a) { + if (pid != (bpf_get_current_pid_tgid() >> 32)) + return 0; #ifdef ENABLE_ATOMICS_TESTS __u64 sub_stack_value = 1; @@ -67,6 +73,8 @@ __u64 and_noreturn_value = (0x110ull << 32); SEC("fentry/bpf_fentry_test1") int BPF_PROG(and, int a) { + if (pid != (bpf_get_current_pid_tgid() >> 32)) + return 0; #ifdef ENABLE_ATOMICS_TESTS and64_result = __sync_fetch_and_and(&and64_value, 0x011ull << 32); @@ -86,6 +94,8 @@ __u64 or_noreturn_value = (0x110ull << 32); SEC("fentry/bpf_fentry_test1") int BPF_PROG(or, int a) { + if (pid != (bpf_get_current_pid_tgid() >> 32)) + return 0; #ifdef ENABLE_ATOMICS_TESTS or64_result = __sync_fetch_and_or(&or64_value, 0x011ull << 32); or32_result = __sync_fetch_and_or(&or32_value, 0x011); @@ -104,6 +114,8 @@ __u64 xor_noreturn_value = (0x110ull << 32); SEC("fentry/bpf_fentry_test1") int BPF_PROG(xor, int a) { + if (pid != (bpf_get_current_pid_tgid() >> 32)) + return 0; #ifdef ENABLE_ATOMICS_TESTS xor64_result = __sync_fetch_and_xor(&xor64_value, 0x011ull << 32); xor32_result = __sync_fetch_and_xor(&xor32_value, 0x011); @@ -123,6 +135,8 @@ __u32 cmpxchg32_result_succeed = 0; SEC("fentry/bpf_fentry_test1") int BPF_PROG(cmpxchg, int a) { + if (pid != (bpf_get_current_pid_tgid() >> 32)) + return 0; #ifdef ENABLE_ATOMICS_TESTS cmpxchg64_result_fail = __sync_val_compare_and_swap(&cmpxchg64_value, 0, 3); cmpxchg64_result_succeed = __sync_val_compare_and_swap(&cmpxchg64_value, 1, 2); @@ -142,6 +156,8 @@ __u32 xchg32_result = 0; SEC("fentry/bpf_fentry_test1") int BPF_PROG(xchg, int a) { + if (pid != (bpf_get_current_pid_tgid() >> 32)) + return 0; #ifdef ENABLE_ATOMICS_TESTS __u64 val64 = 2; __u32 val32 = 2; From 5db02dd7f09fdc32fa3866386784d383aca191d8 Mon Sep 17 00:00:00 2001 From: Yucong Sun Date: Wed, 6 Oct 2021 11:56:17 -0700 Subject: [PATCH 057/181] selftests/bpf: Fix pid check in fexit_sleep test bpf_get_current_pid_tgid() returns u64, whose upper 32 bits are the same as userspace getpid() return value. Signed-off-by: Yucong Sun Signed-off-by: Andrii Nakryiko Link: https://lore.kernel.org/bpf/20211006185619.364369-13-fallentree@fb.com --- tools/testing/selftests/bpf/progs/fexit_sleep.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tools/testing/selftests/bpf/progs/fexit_sleep.c b/tools/testing/selftests/bpf/progs/fexit_sleep.c index 03a672d76353a..bca92c9bd29a1 100644 --- a/tools/testing/selftests/bpf/progs/fexit_sleep.c +++ b/tools/testing/selftests/bpf/progs/fexit_sleep.c @@ -13,7 +13,7 @@ int fexit_cnt = 0; SEC("fentry/__x64_sys_nanosleep") int BPF_PROG(nanosleep_fentry, const struct pt_regs *regs) { - if ((int)bpf_get_current_pid_tgid() != pid) + if (bpf_get_current_pid_tgid() >> 32 != pid) return 0; fentry_cnt++; @@ -23,7 +23,7 @@ int BPF_PROG(nanosleep_fentry, const struct pt_regs *regs) SEC("fexit/__x64_sys_nanosleep") int BPF_PROG(nanosleep_fexit, const struct pt_regs *regs, int ret) { - if ((int)bpf_get_current_pid_tgid() != pid) + if (bpf_get_current_pid_tgid() >> 32 != pid) return 0; fexit_cnt++; From d3f7b1664d3ebd69751327f45f5cd4adfb29f620 Mon Sep 17 00:00:00 2001 From: Yucong Sun Date: Wed, 6 Oct 2021 11:56:19 -0700 Subject: [PATCH 058/181] selfetest/bpf: Make some tests serial Change tests that often fails in parallel execution mode to serial. Signed-off-by: Yucong Sun Signed-off-by: Andrii Nakryiko Link: https://lore.kernel.org/bpf/20211006185619.364369-15-fallentree@fb.com --- tools/testing/selftests/bpf/prog_tests/bpf_iter_setsockopt.c | 2 +- tools/testing/selftests/bpf/prog_tests/bpf_obj_id.c | 2 +- tools/testing/selftests/bpf/prog_tests/cg_storage_multi.c | 2 +- .../selftests/bpf/prog_tests/cgroup_attach_autodetach.c | 2 +- tools/testing/selftests/bpf/prog_tests/cgroup_attach_multi.c | 2 +- .../testing/selftests/bpf/prog_tests/cgroup_attach_override.c | 2 +- tools/testing/selftests/bpf/prog_tests/cgroup_link.c | 2 +- tools/testing/selftests/bpf/prog_tests/check_mtu.c | 2 +- tools/testing/selftests/bpf/prog_tests/fexit_bpf2bpf.c | 3 ++- .../selftests/bpf/prog_tests/flow_dissector_load_bytes.c | 2 +- .../testing/selftests/bpf/prog_tests/flow_dissector_reattach.c | 2 +- tools/testing/selftests/bpf/prog_tests/get_branch_snapshot.c | 2 +- tools/testing/selftests/bpf/prog_tests/kfree_skb.c | 3 ++- tools/testing/selftests/bpf/prog_tests/migrate_reuseport.c | 2 +- tools/testing/selftests/bpf/prog_tests/modify_return.c | 3 ++- tools/testing/selftests/bpf/prog_tests/ns_current_pid_tgid.c | 3 ++- tools/testing/selftests/bpf/prog_tests/perf_buffer.c | 2 +- tools/testing/selftests/bpf/prog_tests/perf_link.c | 3 ++- tools/testing/selftests/bpf/prog_tests/probe_user.c | 3 ++- .../selftests/bpf/prog_tests/raw_tp_writable_test_run.c | 3 ++- tools/testing/selftests/bpf/prog_tests/select_reuseport.c | 2 +- .../selftests/bpf/prog_tests/send_signal_sched_switch.c | 3 ++- tools/testing/selftests/bpf/prog_tests/sk_storage_tracing.c | 2 +- tools/testing/selftests/bpf/prog_tests/snprintf_btf.c | 2 +- tools/testing/selftests/bpf/prog_tests/sock_fields.c | 2 +- tools/testing/selftests/bpf/prog_tests/sockmap_listen.c | 2 +- tools/testing/selftests/bpf/prog_tests/timer.c | 3 ++- tools/testing/selftests/bpf/prog_tests/timer_mim.c | 2 +- tools/testing/selftests/bpf/prog_tests/tp_attach_query.c | 2 +- tools/testing/selftests/bpf/prog_tests/trace_printk.c | 2 +- tools/testing/selftests/bpf/prog_tests/trace_vprintk.c | 2 +- tools/testing/selftests/bpf/prog_tests/trampoline_count.c | 3 ++- tools/testing/selftests/bpf/prog_tests/xdp_attach.c | 2 +- tools/testing/selftests/bpf/prog_tests/xdp_bonding.c | 2 +- tools/testing/selftests/bpf/prog_tests/xdp_cpumap_attach.c | 2 +- tools/testing/selftests/bpf/prog_tests/xdp_devmap_attach.c | 2 +- tools/testing/selftests/bpf/prog_tests/xdp_info.c | 2 +- tools/testing/selftests/bpf/prog_tests/xdp_link.c | 2 +- 38 files changed, 48 insertions(+), 38 deletions(-) diff --git a/tools/testing/selftests/bpf/prog_tests/bpf_iter_setsockopt.c b/tools/testing/selftests/bpf/prog_tests/bpf_iter_setsockopt.c index 85babb0487b31..b52ff8ce34db8 100644 --- a/tools/testing/selftests/bpf/prog_tests/bpf_iter_setsockopt.c +++ b/tools/testing/selftests/bpf/prog_tests/bpf_iter_setsockopt.c @@ -179,7 +179,7 @@ static void do_bpf_iter_setsockopt(struct bpf_iter_setsockopt *iter_skel, free_fds(est_fds, nr_est); } -void test_bpf_iter_setsockopt(void) +void serial_test_bpf_iter_setsockopt(void) { struct bpf_iter_setsockopt *iter_skel = NULL; struct bpf_cubic *cubic_skel = NULL; diff --git a/tools/testing/selftests/bpf/prog_tests/bpf_obj_id.c b/tools/testing/selftests/bpf/prog_tests/bpf_obj_id.c index 284d5921c3458..eb8eeebe69359 100644 --- a/tools/testing/selftests/bpf/prog_tests/bpf_obj_id.c +++ b/tools/testing/selftests/bpf/prog_tests/bpf_obj_id.c @@ -3,7 +3,7 @@ #define nr_iters 2 -void test_bpf_obj_id(void) +void serial_test_bpf_obj_id(void) { const __u64 array_magic_value = 0xfaceb00c; const __u32 array_key = 0; diff --git a/tools/testing/selftests/bpf/prog_tests/cg_storage_multi.c b/tools/testing/selftests/bpf/prog_tests/cg_storage_multi.c index 876be0ecb654f..621c572221918 100644 --- a/tools/testing/selftests/bpf/prog_tests/cg_storage_multi.c +++ b/tools/testing/selftests/bpf/prog_tests/cg_storage_multi.c @@ -363,7 +363,7 @@ static void test_shared(int parent_cgroup_fd, int child_cgroup_fd) cg_storage_multi_shared__destroy(obj); } -void test_cg_storage_multi(void) +void serial_test_cg_storage_multi(void) { int parent_cgroup_fd = -1, child_cgroup_fd = -1; diff --git a/tools/testing/selftests/bpf/prog_tests/cgroup_attach_autodetach.c b/tools/testing/selftests/bpf/prog_tests/cgroup_attach_autodetach.c index 70e94e783070b..5de485c7370fd 100644 --- a/tools/testing/selftests/bpf/prog_tests/cgroup_attach_autodetach.c +++ b/tools/testing/selftests/bpf/prog_tests/cgroup_attach_autodetach.c @@ -21,7 +21,7 @@ static int prog_load(void) bpf_log_buf, BPF_LOG_BUF_SIZE); } -void test_cgroup_attach_autodetach(void) +void serial_test_cgroup_attach_autodetach(void) { __u32 duration = 0, prog_cnt = 4, attach_flags; int allow_prog[2] = {-1}; diff --git a/tools/testing/selftests/bpf/prog_tests/cgroup_attach_multi.c b/tools/testing/selftests/bpf/prog_tests/cgroup_attach_multi.c index 20bb8831dda67..731bea84d8edd 100644 --- a/tools/testing/selftests/bpf/prog_tests/cgroup_attach_multi.c +++ b/tools/testing/selftests/bpf/prog_tests/cgroup_attach_multi.c @@ -74,7 +74,7 @@ static int prog_load_cnt(int verdict, int val) return ret; } -void test_cgroup_attach_multi(void) +void serial_test_cgroup_attach_multi(void) { __u32 prog_ids[4], prog_cnt = 0, attach_flags, saved_prog_id; int cg1 = 0, cg2 = 0, cg3 = 0, cg4 = 0, cg5 = 0, key = 0; diff --git a/tools/testing/selftests/bpf/prog_tests/cgroup_attach_override.c b/tools/testing/selftests/bpf/prog_tests/cgroup_attach_override.c index 9e96f8d87fea4..10d3c33821a7b 100644 --- a/tools/testing/selftests/bpf/prog_tests/cgroup_attach_override.c +++ b/tools/testing/selftests/bpf/prog_tests/cgroup_attach_override.c @@ -23,7 +23,7 @@ static int prog_load(int verdict) bpf_log_buf, BPF_LOG_BUF_SIZE); } -void test_cgroup_attach_override(void) +void serial_test_cgroup_attach_override(void) { int drop_prog = -1, allow_prog = -1, foo = -1, bar = -1; __u32 duration = 0; diff --git a/tools/testing/selftests/bpf/prog_tests/cgroup_link.c b/tools/testing/selftests/bpf/prog_tests/cgroup_link.c index 9091524131d65..9e6e6aad347c7 100644 --- a/tools/testing/selftests/bpf/prog_tests/cgroup_link.c +++ b/tools/testing/selftests/bpf/prog_tests/cgroup_link.c @@ -24,7 +24,7 @@ int ping_and_check(int exp_calls, int exp_alt_calls) return 0; } -void test_cgroup_link(void) +void serial_test_cgroup_link(void) { struct { const char *path; diff --git a/tools/testing/selftests/bpf/prog_tests/check_mtu.c b/tools/testing/selftests/bpf/prog_tests/check_mtu.c index 012068f33a0a8..f73e6e36b74dd 100644 --- a/tools/testing/selftests/bpf/prog_tests/check_mtu.c +++ b/tools/testing/selftests/bpf/prog_tests/check_mtu.c @@ -195,7 +195,7 @@ static void test_check_mtu_tc(__u32 mtu, __u32 ifindex) test_check_mtu__destroy(skel); } -void test_check_mtu(void) +void serial_test_check_mtu(void) { __u32 mtu_lo; diff --git a/tools/testing/selftests/bpf/prog_tests/fexit_bpf2bpf.c b/tools/testing/selftests/bpf/prog_tests/fexit_bpf2bpf.c index 2839f4270a26e..9cff14a23bb72 100644 --- a/tools/testing/selftests/bpf/prog_tests/fexit_bpf2bpf.c +++ b/tools/testing/selftests/bpf/prog_tests/fexit_bpf2bpf.c @@ -380,7 +380,8 @@ static void test_func_map_prog_compatibility(void) "./test_attach_probe.o"); } -void test_fexit_bpf2bpf(void) +/* NOTE: affect other tests, must run in serial mode */ +void serial_test_fexit_bpf2bpf(void) { if (test__start_subtest("target_no_callees")) test_target_no_callees(); diff --git a/tools/testing/selftests/bpf/prog_tests/flow_dissector_load_bytes.c b/tools/testing/selftests/bpf/prog_tests/flow_dissector_load_bytes.c index 0e8a4d2f023d0..6093728497c70 100644 --- a/tools/testing/selftests/bpf/prog_tests/flow_dissector_load_bytes.c +++ b/tools/testing/selftests/bpf/prog_tests/flow_dissector_load_bytes.c @@ -2,7 +2,7 @@ #include #include -void test_flow_dissector_load_bytes(void) +void serial_test_flow_dissector_load_bytes(void) { struct bpf_flow_keys flow_keys; __u32 duration = 0, retval, size; diff --git a/tools/testing/selftests/bpf/prog_tests/flow_dissector_reattach.c b/tools/testing/selftests/bpf/prog_tests/flow_dissector_reattach.c index 3931ede5c5340..f0c6c226aba8c 100644 --- a/tools/testing/selftests/bpf/prog_tests/flow_dissector_reattach.c +++ b/tools/testing/selftests/bpf/prog_tests/flow_dissector_reattach.c @@ -628,7 +628,7 @@ static void run_tests(int netns) } } -void test_flow_dissector_reattach(void) +void serial_test_flow_dissector_reattach(void) { int err, new_net, saved_net; diff --git a/tools/testing/selftests/bpf/prog_tests/get_branch_snapshot.c b/tools/testing/selftests/bpf/prog_tests/get_branch_snapshot.c index e4f92feb7b32c..d6d70a359aeb5 100644 --- a/tools/testing/selftests/bpf/prog_tests/get_branch_snapshot.c +++ b/tools/testing/selftests/bpf/prog_tests/get_branch_snapshot.c @@ -73,7 +73,7 @@ static void close_perf_events(void) free(pfd_array); } -void test_get_branch_snapshot(void) +void serial_test_get_branch_snapshot(void) { struct get_branch_snapshot *skel = NULL; int err; diff --git a/tools/testing/selftests/bpf/prog_tests/kfree_skb.c b/tools/testing/selftests/bpf/prog_tests/kfree_skb.c index ddfb6bf971524..032a322d51f2a 100644 --- a/tools/testing/selftests/bpf/prog_tests/kfree_skb.c +++ b/tools/testing/selftests/bpf/prog_tests/kfree_skb.c @@ -48,7 +48,8 @@ static void on_sample(void *ctx, int cpu, void *data, __u32 size) *(bool *)ctx = true; } -void test_kfree_skb(void) +/* TODO: fix kernel panic caused by this test in parallel mode */ +void serial_test_kfree_skb(void) { struct __sk_buff skb = {}; struct bpf_prog_test_run_attr tattr = { diff --git a/tools/testing/selftests/bpf/prog_tests/migrate_reuseport.c b/tools/testing/selftests/bpf/prog_tests/migrate_reuseport.c index 59adb4715394f..7589c03fd26be 100644 --- a/tools/testing/selftests/bpf/prog_tests/migrate_reuseport.c +++ b/tools/testing/selftests/bpf/prog_tests/migrate_reuseport.c @@ -541,7 +541,7 @@ static void run_test(struct migrate_reuseport_test_case *test_case, } } -void test_migrate_reuseport(void) +void serial_test_migrate_reuseport(void) { struct test_migrate_reuseport *skel; int i; diff --git a/tools/testing/selftests/bpf/prog_tests/modify_return.c b/tools/testing/selftests/bpf/prog_tests/modify_return.c index 97fec70c600be..b772fe30ce9bf 100644 --- a/tools/testing/selftests/bpf/prog_tests/modify_return.c +++ b/tools/testing/selftests/bpf/prog_tests/modify_return.c @@ -53,7 +53,8 @@ static void run_test(__u32 input_retval, __u16 want_side_effect, __s16 want_ret) modify_return__destroy(skel); } -void test_modify_return(void) +/* TODO: conflict with get_func_ip_test */ +void serial_test_modify_return(void) { run_test(0 /* input_retval */, 1 /* want_side_effect */, diff --git a/tools/testing/selftests/bpf/prog_tests/ns_current_pid_tgid.c b/tools/testing/selftests/bpf/prog_tests/ns_current_pid_tgid.c index 2535788e135f0..24d493482ffc7 100644 --- a/tools/testing/selftests/bpf/prog_tests/ns_current_pid_tgid.c +++ b/tools/testing/selftests/bpf/prog_tests/ns_current_pid_tgid.c @@ -78,7 +78,8 @@ static void test_ns_current_pid_tgid_new_ns(void) return; } -void test_ns_current_pid_tgid(void) +/* TODO: use a different tracepoint */ +void serial_test_ns_current_pid_tgid(void) { if (test__start_subtest("ns_current_pid_tgid_root_ns")) test_current_pid_tgid(NULL); diff --git a/tools/testing/selftests/bpf/prog_tests/perf_buffer.c b/tools/testing/selftests/bpf/prog_tests/perf_buffer.c index 6490e9673002f..6979aff4aab22 100644 --- a/tools/testing/selftests/bpf/prog_tests/perf_buffer.c +++ b/tools/testing/selftests/bpf/prog_tests/perf_buffer.c @@ -43,7 +43,7 @@ int trigger_on_cpu(int cpu) return 0; } -void test_perf_buffer(void) +void serial_test_perf_buffer(void) { int err, on_len, nr_on_cpus = 0, nr_cpus, i; struct perf_buffer_opts pb_opts = {}; diff --git a/tools/testing/selftests/bpf/prog_tests/perf_link.c b/tools/testing/selftests/bpf/prog_tests/perf_link.c index b1abd0c466073..ede07344f264e 100644 --- a/tools/testing/selftests/bpf/prog_tests/perf_link.c +++ b/tools/testing/selftests/bpf/prog_tests/perf_link.c @@ -23,7 +23,8 @@ static void burn_cpu(void) ++j; } -void test_perf_link(void) +/* TODO: often fails in concurrent mode */ +void serial_test_perf_link(void) { struct test_perf_link *skel = NULL; struct perf_event_attr attr; diff --git a/tools/testing/selftests/bpf/prog_tests/probe_user.c b/tools/testing/selftests/bpf/prog_tests/probe_user.c index 52fe157e2a902..abf890d066eb4 100644 --- a/tools/testing/selftests/bpf/prog_tests/probe_user.c +++ b/tools/testing/selftests/bpf/prog_tests/probe_user.c @@ -1,7 +1,8 @@ // SPDX-License-Identifier: GPL-2.0 #include -void test_probe_user(void) +/* TODO: corrupts other tests uses connect() */ +void serial_test_probe_user(void) { const char *prog_name = "handle_sys_connect"; const char *obj_file = "./test_probe_user.o"; diff --git a/tools/testing/selftests/bpf/prog_tests/raw_tp_writable_test_run.c b/tools/testing/selftests/bpf/prog_tests/raw_tp_writable_test_run.c index 5c45424cac5f8..ddefa1192e5d5 100644 --- a/tools/testing/selftests/bpf/prog_tests/raw_tp_writable_test_run.c +++ b/tools/testing/selftests/bpf/prog_tests/raw_tp_writable_test_run.c @@ -3,7 +3,8 @@ #include #include -void test_raw_tp_writable_test_run(void) +/* NOTE: conflict with other tests. */ +void serial_test_raw_tp_writable_test_run(void) { __u32 duration = 0; char error[4096]; diff --git a/tools/testing/selftests/bpf/prog_tests/select_reuseport.c b/tools/testing/selftests/bpf/prog_tests/select_reuseport.c index d40e9156c48d1..3cfc910ab3c1b 100644 --- a/tools/testing/selftests/bpf/prog_tests/select_reuseport.c +++ b/tools/testing/selftests/bpf/prog_tests/select_reuseport.c @@ -858,7 +858,7 @@ void test_map_type(enum bpf_map_type mt) cleanup(); } -void test_select_reuseport(void) +void serial_test_select_reuseport(void) { saved_tcp_fo = read_int_sysctl(TCP_FO_SYSCTL); if (saved_tcp_fo < 0) diff --git a/tools/testing/selftests/bpf/prog_tests/send_signal_sched_switch.c b/tools/testing/selftests/bpf/prog_tests/send_signal_sched_switch.c index 189a34a7addbd..15dacfcfaa6dd 100644 --- a/tools/testing/selftests/bpf/prog_tests/send_signal_sched_switch.c +++ b/tools/testing/selftests/bpf/prog_tests/send_signal_sched_switch.c @@ -25,7 +25,8 @@ static void *worker(void *p) return NULL; } -void test_send_signal_sched_switch(void) +/* NOTE: cause events loss */ +void serial_test_send_signal_sched_switch(void) { struct test_send_signal_kern *skel; pthread_t threads[THREAD_COUNT]; diff --git a/tools/testing/selftests/bpf/prog_tests/sk_storage_tracing.c b/tools/testing/selftests/bpf/prog_tests/sk_storage_tracing.c index 2b392590e8ca4..547ae53cde746 100644 --- a/tools/testing/selftests/bpf/prog_tests/sk_storage_tracing.c +++ b/tools/testing/selftests/bpf/prog_tests/sk_storage_tracing.c @@ -105,7 +105,7 @@ static void do_test(void) close(listen_fd); } -void test_sk_storage_tracing(void) +void serial_test_sk_storage_tracing(void) { struct test_sk_storage_trace_itself *skel_itself; int err; diff --git a/tools/testing/selftests/bpf/prog_tests/snprintf_btf.c b/tools/testing/selftests/bpf/prog_tests/snprintf_btf.c index 76e1f5fe18fac..dd41b826be309 100644 --- a/tools/testing/selftests/bpf/prog_tests/snprintf_btf.c +++ b/tools/testing/selftests/bpf/prog_tests/snprintf_btf.c @@ -6,7 +6,7 @@ /* Demonstrate that bpf_snprintf_btf succeeds and that various data types * are formatted correctly. */ -void test_snprintf_btf(void) +void serial_test_snprintf_btf(void) { struct netif_receive_skb *skel; struct netif_receive_skb__bss *bss; diff --git a/tools/testing/selftests/bpf/prog_tests/sock_fields.c b/tools/testing/selftests/bpf/prog_tests/sock_fields.c index 577d619fb07ed..fae40db4d81f5 100644 --- a/tools/testing/selftests/bpf/prog_tests/sock_fields.c +++ b/tools/testing/selftests/bpf/prog_tests/sock_fields.c @@ -329,7 +329,7 @@ static void test(void) close(listen_fd); } -void test_sock_fields(void) +void serial_test_sock_fields(void) { struct bpf_link *egress_link = NULL, *ingress_link = NULL; int parent_cg_fd = -1, child_cg_fd = -1; diff --git a/tools/testing/selftests/bpf/prog_tests/sockmap_listen.c b/tools/testing/selftests/bpf/prog_tests/sockmap_listen.c index 5c5979046523e..102c73a00402b 100644 --- a/tools/testing/selftests/bpf/prog_tests/sockmap_listen.c +++ b/tools/testing/selftests/bpf/prog_tests/sockmap_listen.c @@ -2037,7 +2037,7 @@ static void run_tests(struct test_sockmap_listen *skel, struct bpf_map *map, test_udp_unix_redir(skel, map, family); } -void test_sockmap_listen(void) +void serial_test_sockmap_listen(void) { struct test_sockmap_listen *skel; diff --git a/tools/testing/selftests/bpf/prog_tests/timer.c b/tools/testing/selftests/bpf/prog_tests/timer.c index 25f40e1b99670..0f4e49e622cdf 100644 --- a/tools/testing/selftests/bpf/prog_tests/timer.c +++ b/tools/testing/selftests/bpf/prog_tests/timer.c @@ -39,7 +39,8 @@ static int timer(struct timer *timer_skel) return 0; } -void test_timer(void) +/* TODO: use pid filtering */ +void serial_test_timer(void) { struct timer *timer_skel = NULL; int err; diff --git a/tools/testing/selftests/bpf/prog_tests/timer_mim.c b/tools/testing/selftests/bpf/prog_tests/timer_mim.c index ced8f6cf347cd..949a0617869df 100644 --- a/tools/testing/selftests/bpf/prog_tests/timer_mim.c +++ b/tools/testing/selftests/bpf/prog_tests/timer_mim.c @@ -52,7 +52,7 @@ static int timer_mim(struct timer_mim *timer_skel) return 0; } -void test_timer_mim(void) +void serial_test_timer_mim(void) { struct timer_mim_reject *timer_reject_skel = NULL; libbpf_print_fn_t old_print_fn = NULL; diff --git a/tools/testing/selftests/bpf/prog_tests/tp_attach_query.c b/tools/testing/selftests/bpf/prog_tests/tp_attach_query.c index fb095e5cd9afd..8652d0a46c87a 100644 --- a/tools/testing/selftests/bpf/prog_tests/tp_attach_query.c +++ b/tools/testing/selftests/bpf/prog_tests/tp_attach_query.c @@ -1,7 +1,7 @@ // SPDX-License-Identifier: GPL-2.0 #include -void test_tp_attach_query(void) +void serial_test_tp_attach_query(void) { const int num_progs = 3; int i, j, bytes, efd, err, prog_fd[num_progs], pmu_fd[num_progs]; diff --git a/tools/testing/selftests/bpf/prog_tests/trace_printk.c b/tools/testing/selftests/bpf/prog_tests/trace_printk.c index e47835f0a6748..3f7a7141265ee 100644 --- a/tools/testing/selftests/bpf/prog_tests/trace_printk.c +++ b/tools/testing/selftests/bpf/prog_tests/trace_printk.c @@ -8,7 +8,7 @@ #define TRACEBUF "/sys/kernel/debug/tracing/trace_pipe" #define SEARCHMSG "testing,testing" -void test_trace_printk(void) +void serial_test_trace_printk(void) { int err = 0, iter = 0, found = 0; struct trace_printk__bss *bss; diff --git a/tools/testing/selftests/bpf/prog_tests/trace_vprintk.c b/tools/testing/selftests/bpf/prog_tests/trace_vprintk.c index 61a24e62e1a0f..46101270cb1ab 100644 --- a/tools/testing/selftests/bpf/prog_tests/trace_vprintk.c +++ b/tools/testing/selftests/bpf/prog_tests/trace_vprintk.c @@ -8,7 +8,7 @@ #define TRACEBUF "/sys/kernel/debug/tracing/trace_pipe" #define SEARCHMSG "1,2,3,4,5,6,7,8,9,10" -void test_trace_vprintk(void) +void serial_test_trace_vprintk(void) { int err = 0, iter = 0, found = 0; struct trace_vprintk__bss *bss; diff --git a/tools/testing/selftests/bpf/prog_tests/trampoline_count.c b/tools/testing/selftests/bpf/prog_tests/trampoline_count.c index d7f5a931d7f38..fc146671b20a3 100644 --- a/tools/testing/selftests/bpf/prog_tests/trampoline_count.c +++ b/tools/testing/selftests/bpf/prog_tests/trampoline_count.c @@ -41,7 +41,8 @@ static struct bpf_link *load(struct bpf_object *obj, const char *name) return bpf_program__attach_trace(prog); } -void test_trampoline_count(void) +/* TODO: use different target function to run in concurrent mode */ +void serial_test_trampoline_count(void) { const char *fentry_name = "fentry/__set_task_comm"; const char *fexit_name = "fexit/__set_task_comm"; diff --git a/tools/testing/selftests/bpf/prog_tests/xdp_attach.c b/tools/testing/selftests/bpf/prog_tests/xdp_attach.c index 15ef3531483ef..4c4057262cd8d 100644 --- a/tools/testing/selftests/bpf/prog_tests/xdp_attach.c +++ b/tools/testing/selftests/bpf/prog_tests/xdp_attach.c @@ -4,7 +4,7 @@ #define IFINDEX_LO 1 #define XDP_FLAGS_REPLACE (1U << 4) -void test_xdp_attach(void) +void serial_test_xdp_attach(void) { __u32 duration = 0, id1, id2, id0 = 0, len; struct bpf_object *obj1, *obj2, *obj3; diff --git a/tools/testing/selftests/bpf/prog_tests/xdp_bonding.c b/tools/testing/selftests/bpf/prog_tests/xdp_bonding.c index ad3ba81b40480..faa22b84f2eeb 100644 --- a/tools/testing/selftests/bpf/prog_tests/xdp_bonding.c +++ b/tools/testing/selftests/bpf/prog_tests/xdp_bonding.c @@ -519,7 +519,7 @@ static struct bond_test_case bond_test_cases[] = { { "xdp_bonding_xor_layer34", BOND_MODE_XOR, BOND_XMIT_POLICY_LAYER34, }, }; -void test_xdp_bonding(void) +void serial_test_xdp_bonding(void) { libbpf_print_fn_t old_print_fn; struct skeletons skeletons = {}; diff --git a/tools/testing/selftests/bpf/prog_tests/xdp_cpumap_attach.c b/tools/testing/selftests/bpf/prog_tests/xdp_cpumap_attach.c index 8755effd80b0b..fd812bd43600a 100644 --- a/tools/testing/selftests/bpf/prog_tests/xdp_cpumap_attach.c +++ b/tools/testing/selftests/bpf/prog_tests/xdp_cpumap_attach.c @@ -7,7 +7,7 @@ #define IFINDEX_LO 1 -void test_xdp_cpumap_attach(void) +void serial_test_xdp_cpumap_attach(void) { struct test_xdp_with_cpumap_helpers *skel; struct bpf_prog_info info = {}; diff --git a/tools/testing/selftests/bpf/prog_tests/xdp_devmap_attach.c b/tools/testing/selftests/bpf/prog_tests/xdp_devmap_attach.c index c72af030ff101..d4e9a9972a67e 100644 --- a/tools/testing/selftests/bpf/prog_tests/xdp_devmap_attach.c +++ b/tools/testing/selftests/bpf/prog_tests/xdp_devmap_attach.c @@ -72,7 +72,7 @@ void test_neg_xdp_devmap_helpers(void) } -void test_xdp_devmap_attach(void) +void serial_test_xdp_devmap_attach(void) { if (test__start_subtest("DEVMAP with programs in entries")) test_xdp_with_devmap_helpers(); diff --git a/tools/testing/selftests/bpf/prog_tests/xdp_info.c b/tools/testing/selftests/bpf/prog_tests/xdp_info.c index d2d7a283d72f9..4e2a4fd56f675 100644 --- a/tools/testing/selftests/bpf/prog_tests/xdp_info.c +++ b/tools/testing/selftests/bpf/prog_tests/xdp_info.c @@ -4,7 +4,7 @@ #define IFINDEX_LO 1 -void test_xdp_info(void) +void serial_test_xdp_info(void) { __u32 len = sizeof(struct bpf_prog_info), duration = 0, prog_id; const char *file = "./xdp_dummy.o"; diff --git a/tools/testing/selftests/bpf/prog_tests/xdp_link.c b/tools/testing/selftests/bpf/prog_tests/xdp_link.c index 46eed0a33c23a..983ab0b47d30a 100644 --- a/tools/testing/selftests/bpf/prog_tests/xdp_link.c +++ b/tools/testing/selftests/bpf/prog_tests/xdp_link.c @@ -6,7 +6,7 @@ #define IFINDEX_LO 1 -void test_xdp_link(void) +void serial_test_xdp_link(void) { __u32 duration = 0, id1, id2, id0 = 0, prog_fd1, prog_fd2, err; DECLARE_LIBBPF_OPTS(bpf_xdp_set_link_opts, opts, .old_fd = -1); From 5319255b8df9271474bc9027cabf82253934f28d Mon Sep 17 00:00:00 2001 From: Ilya Leoshkevich Date: Thu, 7 Oct 2021 19:33:28 +0200 Subject: [PATCH 059/181] selftests/bpf: Skip verifier tests that fail to load with ENOTSUPP The verifier tests added in commit c48e51c8b07a ("bpf: selftests: Add selftests for module kfunc support") fail on s390, since the JIT does not support calling kernel functions. This is most likely an issue for all the other non-Intel arches, as well as on Intel with !CONFIG_DEBUG_INFO_BTF or !CONFIG_BPF_JIT. Trying to check for messages from all the possible add_kfunc_call() failure cases in test_verifier looks pointless, so do a much simpler thing instead: just like it's already done in do_prog_test_run(), skip the tests that fail to load with ENOTSUPP. Signed-off-by: Ilya Leoshkevich Signed-off-by: Andrii Nakryiko Acked-by: Song Liu Link: https://lore.kernel.org/bpf/20211007173329.381754-1-iii@linux.ibm.com --- tools/testing/selftests/bpf/test_verifier.c | 12 +++++++++++- 1 file changed, 11 insertions(+), 1 deletion(-) diff --git a/tools/testing/selftests/bpf/test_verifier.c b/tools/testing/selftests/bpf/test_verifier.c index 3a9e332c5e360..25afe423b3f06 100644 --- a/tools/testing/selftests/bpf/test_verifier.c +++ b/tools/testing/selftests/bpf/test_verifier.c @@ -47,6 +47,10 @@ #include "test_btf.h" #include "../../../include/linux/filter.h" +#ifndef ENOTSUPP +#define ENOTSUPP 524 +#endif + #define MAX_INSNS BPF_MAXINSNS #define MAX_TEST_INSNS 1000000 #define MAX_FIXUPS 8 @@ -974,7 +978,7 @@ static int do_prog_test_run(int fd_prog, bool unpriv, uint32_t expected_val, if (err) { switch (saved_errno) { - case 524/*ENOTSUPP*/: + case ENOTSUPP: printf("Did not run the program (not supported) "); return 0; case EPERM: @@ -1119,6 +1123,12 @@ static void do_test_single(struct bpf_test *test, bool unpriv, goto close_fds; } + if (fd_prog < 0 && saved_errno == ENOTSUPP) { + printf("SKIP (program uses an unsupported feature)\n"); + skips++; + goto close_fds; + } + alignment_prevented_execution = 0; if (expected_ret == ACCEPT || expected_ret == VERBOSE_ACCEPT) { From 307d149d9435e5514268d6020d430245813d82cc Mon Sep 17 00:00:00 2001 From: Tiezhu Yang Date: Mon, 11 Oct 2021 09:19:09 +0800 Subject: [PATCH 060/181] bpf, mips: Clean up config options about JIT The config options MIPS_CBPF_JIT and MIPS_EBPF_JIT are useless, remove them in arch/mips/Kconfig, and then modify arch/mips/net/Makefile. Signed-off-by: Tiezhu Yang Signed-off-by: Daniel Borkmann Acked-by: Johan Almbladh Link: https://lore.kernel.org/bpf/1633915150-13220-2-git-send-email-yangtiezhu@loongson.cn --- arch/mips/Kconfig | 9 --------- arch/mips/net/Makefile | 6 +++--- 2 files changed, 3 insertions(+), 12 deletions(-) diff --git a/arch/mips/Kconfig b/arch/mips/Kconfig index 38468f47aa5e9..9b03c789cb27b 100644 --- a/arch/mips/Kconfig +++ b/arch/mips/Kconfig @@ -1214,15 +1214,6 @@ config SYS_SUPPORTS_RELOCATABLE The platform must provide plat_get_fdt() if it selects CONFIG_USE_OF to allow access to command line and entropy sources. -config MIPS_CBPF_JIT - def_bool y - depends on BPF_JIT && HAVE_CBPF_JIT - -config MIPS_EBPF_JIT - def_bool y - depends on BPF_JIT && HAVE_EBPF_JIT - - # # Endianness selection. Sufficiently obscure so many users don't know what to # answer,so we try hard to limit the available choices. Also the use of a diff --git a/arch/mips/net/Makefile b/arch/mips/net/Makefile index 95e826781dbc5..e3e6ae6514e84 100644 --- a/arch/mips/net/Makefile +++ b/arch/mips/net/Makefile @@ -1,10 +1,10 @@ # SPDX-License-Identifier: GPL-2.0-only # MIPS networking code -obj-$(CONFIG_MIPS_EBPF_JIT) += bpf_jit_comp.o +obj-$(CONFIG_BPF_JIT) += bpf_jit_comp.o ifeq ($(CONFIG_32BIT),y) - obj-$(CONFIG_MIPS_EBPF_JIT) += bpf_jit_comp32.o + obj-$(CONFIG_BPF_JIT) += bpf_jit_comp32.o else - obj-$(CONFIG_MIPS_EBPF_JIT) += bpf_jit_comp64.o + obj-$(CONFIG_BPF_JIT) += bpf_jit_comp64.o endif From 431bfb9ee3e2ac0294d5ef58cee87683807299c0 Mon Sep 17 00:00:00 2001 From: Tiezhu Yang Date: Mon, 11 Oct 2021 09:19:10 +0800 Subject: [PATCH 061/181] bpf, mips: Fix comment on tail call count limiting In emit_tail_call() of bpf_jit_comp32.c, "blez t2" (t2 <= 0) is not consistent with the comment "t2 < 0", update the comment to keep consistency. Signed-off-by: Tiezhu Yang Signed-off-by: Daniel Borkmann Acked-by: Johan Almbladh Link: https://lore.kernel.org/bpf/1633915150-13220-3-git-send-email-yangtiezhu@loongson.cn --- arch/mips/net/bpf_jit_comp32.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/mips/net/bpf_jit_comp32.c b/arch/mips/net/bpf_jit_comp32.c index 9d7041a2e5d73..bd996ede12f8e 100644 --- a/arch/mips/net/bpf_jit_comp32.c +++ b/arch/mips/net/bpf_jit_comp32.c @@ -1315,7 +1315,7 @@ static int emit_tail_call(struct jit_context *ctx) /* if (TCC-- <= 0) goto out */ emit(ctx, lw, t2, ctx->stack_size, MIPS_R_SP); /* t2 = *(SP + size) */ emit_load_delay(ctx); /* Load delay slot */ - emit(ctx, blez, t2, get_offset(ctx, 1)); /* PC += off(1) if t2 < 0 */ + emit(ctx, blez, t2, get_offset(ctx, 1)); /* PC += off(1) if t2 <= 0 */ emit(ctx, addiu, t2, t2, -1); /* t2-- (delay slot) */ emit(ctx, sw, t2, ctx->stack_size, MIPS_R_SP); /* *(SP + size) = t2 */ From 223f903e9c832699f4e5f422281a60756c1c6cfe Mon Sep 17 00:00:00 2001 From: Yonghong Song Date: Tue, 12 Oct 2021 09:48:38 -0700 Subject: [PATCH 062/181] bpf: Rename BTF_KIND_TAG to BTF_KIND_DECL_TAG Patch set [1] introduced BTF_KIND_TAG to allow tagging declarations for struct/union, struct/union field, var, func and func arguments and these tags will be encoded into dwarf. They are also encoded to btf by llvm for the bpf target. After BTF_KIND_TAG is introduced, we intended to use it for kernel __user attributes. But kernel __user is actually a type attribute. Upstream and internal discussion showed it is not a good idea to mix declaration attribute and type attribute. So we proposed to introduce btf_type_tag as a type attribute and existing btf_tag renamed to btf_decl_tag ([2]). This patch renamed BTF_KIND_TAG to BTF_KIND_DECL_TAG and some other declarations with *_tag to *_decl_tag to make it clear the tag is for declaration. In the future, BTF_KIND_TYPE_TAG might be introduced per [3]. [1] https://lore.kernel.org/bpf/20210914223004.244411-1-yhs@fb.com/ [2] https://reviews.llvm.org/D111588 [3] https://reviews.llvm.org/D111199 Fixes: b5ea834dde6b ("bpf: Support for new btf kind BTF_KIND_TAG") Fixes: 5b84bd10363e ("libbpf: Add support for BTF_KIND_TAG") Fixes: 5c07f2fec003 ("bpftool: Add support for BTF_KIND_TAG") Signed-off-by: Yonghong Song Signed-off-by: Alexei Starovoitov Link: https://lore.kernel.org/bpf/20211012164838.3345699-1-yhs@fb.com --- Documentation/bpf/btf.rst | 24 +-- include/uapi/linux/btf.h | 8 +- kernel/bpf/btf.c | 44 ++--- tools/bpf/bpftool/btf.c | 6 +- tools/include/uapi/linux/btf.h | 8 +- tools/lib/bpf/btf.c | 36 ++-- tools/lib/bpf/btf.h | 12 +- tools/lib/bpf/btf_dump.c | 6 +- tools/lib/bpf/libbpf.c | 24 +-- tools/lib/bpf/libbpf.map | 2 +- tools/lib/bpf/libbpf_internal.h | 4 +- tools/testing/selftests/bpf/README.rst | 4 +- tools/testing/selftests/bpf/btf_helpers.c | 8 +- tools/testing/selftests/bpf/prog_tests/btf.c | 160 +++++++++--------- .../selftests/bpf/prog_tests/btf_write.c | 30 ++-- tools/testing/selftests/bpf/progs/tag.c | 6 +- tools/testing/selftests/bpf/test_btf.h | 4 +- 17 files changed, 193 insertions(+), 193 deletions(-) diff --git a/Documentation/bpf/btf.rst b/Documentation/bpf/btf.rst index 1bfe4072f5fca..9e5b4a98af76b 100644 --- a/Documentation/bpf/btf.rst +++ b/Documentation/bpf/btf.rst @@ -85,7 +85,7 @@ sequentially and type id is assigned to each recognized type starting from id #define BTF_KIND_VAR 14 /* Variable */ #define BTF_KIND_DATASEC 15 /* Section */ #define BTF_KIND_FLOAT 16 /* Floating point */ - #define BTF_KIND_TAG 17 /* Tag */ + #define BTF_KIND_DECL_TAG 17 /* Decl Tag */ Note that the type section encodes debug info, not just pure types. ``BTF_KIND_FUNC`` is not a type, and it represents a defined subprogram. @@ -107,7 +107,7 @@ Each type contains the following common data:: * "size" tells the size of the type it is describing. * * "type" is used by PTR, TYPEDEF, VOLATILE, CONST, RESTRICT, - * FUNC, FUNC_PROTO and TAG. + * FUNC, FUNC_PROTO and DECL_TAG. * "type" is a type_id referring to another type. */ union { @@ -466,30 +466,30 @@ map definition. No additional type data follow ``btf_type``. -2.2.17 BTF_KIND_TAG -~~~~~~~~~~~~~~~~~~~ +2.2.17 BTF_KIND_DECL_TAG +~~~~~~~~~~~~~~~~~~~~~~~~ ``struct btf_type`` encoding requirement: * ``name_off``: offset to a non-empty string * ``info.kind_flag``: 0 - * ``info.kind``: BTF_KIND_TAG + * ``info.kind``: BTF_KIND_DECL_TAG * ``info.vlen``: 0 * ``type``: ``struct``, ``union``, ``func`` or ``var`` -``btf_type`` is followed by ``struct btf_tag``.:: +``btf_type`` is followed by ``struct btf_decl_tag``.:: - struct btf_tag { + struct btf_decl_tag { __u32 component_idx; }; -The ``name_off`` encodes btf_tag attribute string. +The ``name_off`` encodes btf_decl_tag attribute string. The ``type`` should be ``struct``, ``union``, ``func`` or ``var``. -For ``var`` type, ``btf_tag.component_idx`` must be ``-1``. -For the other three types, if the btf_tag attribute is +For ``var`` type, ``btf_decl_tag.component_idx`` must be ``-1``. +For the other three types, if the btf_decl_tag attribute is applied to the ``struct``, ``union`` or ``func`` itself, -``btf_tag.component_idx`` must be ``-1``. Otherwise, +``btf_decl_tag.component_idx`` must be ``-1``. Otherwise, the attribute is applied to a ``struct``/``union`` member or -a ``func`` argument, and ``btf_tag.component_idx`` should be a +a ``func`` argument, and ``btf_decl_tag.component_idx`` should be a valid index (starting from 0) pointing to a member or an argument. 3. BTF Kernel API diff --git a/include/uapi/linux/btf.h b/include/uapi/linux/btf.h index 642b6ecb37d7e..deb12f755f0fa 100644 --- a/include/uapi/linux/btf.h +++ b/include/uapi/linux/btf.h @@ -43,7 +43,7 @@ struct btf_type { * "size" tells the size of the type it is describing. * * "type" is used by PTR, TYPEDEF, VOLATILE, CONST, RESTRICT, - * FUNC, FUNC_PROTO, VAR and TAG. + * FUNC, FUNC_PROTO, VAR and DECL_TAG. * "type" is a type_id referring to another type. */ union { @@ -74,7 +74,7 @@ enum { BTF_KIND_VAR = 14, /* Variable */ BTF_KIND_DATASEC = 15, /* Section */ BTF_KIND_FLOAT = 16, /* Floating point */ - BTF_KIND_TAG = 17, /* Tag */ + BTF_KIND_DECL_TAG = 17, /* Decl Tag */ NR_BTF_KINDS, BTF_KIND_MAX = NR_BTF_KINDS - 1, @@ -174,14 +174,14 @@ struct btf_var_secinfo { __u32 size; }; -/* BTF_KIND_TAG is followed by a single "struct btf_tag" to describe +/* BTF_KIND_DECL_TAG is followed by a single "struct btf_decl_tag" to describe * additional information related to the tag applied location. * If component_idx == -1, the tag is applied to a struct, union, * variable or function. Otherwise, it is applied to a struct/union * member or a func argument, and component_idx indicates which member * or argument (0 ... vlen-1). */ -struct btf_tag { +struct btf_decl_tag { __s32 component_idx; }; diff --git a/kernel/bpf/btf.c b/kernel/bpf/btf.c index 2ebffb9f57ebb..9059053088b90 100644 --- a/kernel/bpf/btf.c +++ b/kernel/bpf/btf.c @@ -281,7 +281,7 @@ static const char * const btf_kind_str[NR_BTF_KINDS] = { [BTF_KIND_VAR] = "VAR", [BTF_KIND_DATASEC] = "DATASEC", [BTF_KIND_FLOAT] = "FLOAT", - [BTF_KIND_TAG] = "TAG", + [BTF_KIND_DECL_TAG] = "DECL_TAG", }; const char *btf_type_str(const struct btf_type *t) @@ -460,12 +460,12 @@ static bool btf_type_is_datasec(const struct btf_type *t) return BTF_INFO_KIND(t->info) == BTF_KIND_DATASEC; } -static bool btf_type_is_tag(const struct btf_type *t) +static bool btf_type_is_decl_tag(const struct btf_type *t) { - return BTF_INFO_KIND(t->info) == BTF_KIND_TAG; + return BTF_INFO_KIND(t->info) == BTF_KIND_DECL_TAG; } -static bool btf_type_is_tag_target(const struct btf_type *t) +static bool btf_type_is_decl_tag_target(const struct btf_type *t) { return btf_type_is_func(t) || btf_type_is_struct(t) || btf_type_is_var(t); @@ -549,7 +549,7 @@ const struct btf_type *btf_type_resolve_func_ptr(const struct btf *btf, static bool btf_type_is_resolve_source_only(const struct btf_type *t) { return btf_type_is_var(t) || - btf_type_is_tag(t) || + btf_type_is_decl_tag(t) || btf_type_is_datasec(t); } @@ -576,7 +576,7 @@ static bool btf_type_needs_resolve(const struct btf_type *t) btf_type_is_struct(t) || btf_type_is_array(t) || btf_type_is_var(t) || - btf_type_is_tag(t) || + btf_type_is_decl_tag(t) || btf_type_is_datasec(t); } @@ -630,9 +630,9 @@ static const struct btf_var *btf_type_var(const struct btf_type *t) return (const struct btf_var *)(t + 1); } -static const struct btf_tag *btf_type_tag(const struct btf_type *t) +static const struct btf_decl_tag *btf_type_decl_tag(const struct btf_type *t) { - return (const struct btf_tag *)(t + 1); + return (const struct btf_decl_tag *)(t + 1); } static const struct btf_kind_operations *btf_type_ops(const struct btf_type *t) @@ -3820,11 +3820,11 @@ static const struct btf_kind_operations float_ops = { .show = btf_df_show, }; -static s32 btf_tag_check_meta(struct btf_verifier_env *env, +static s32 btf_decl_tag_check_meta(struct btf_verifier_env *env, const struct btf_type *t, u32 meta_left) { - const struct btf_tag *tag; + const struct btf_decl_tag *tag; u32 meta_needed = sizeof(*tag); s32 component_idx; const char *value; @@ -3852,7 +3852,7 @@ static s32 btf_tag_check_meta(struct btf_verifier_env *env, return -EINVAL; } - component_idx = btf_type_tag(t)->component_idx; + component_idx = btf_type_decl_tag(t)->component_idx; if (component_idx < -1) { btf_verifier_log_type(env, t, "Invalid component_idx"); return -EINVAL; @@ -3863,7 +3863,7 @@ static s32 btf_tag_check_meta(struct btf_verifier_env *env, return meta_needed; } -static int btf_tag_resolve(struct btf_verifier_env *env, +static int btf_decl_tag_resolve(struct btf_verifier_env *env, const struct resolve_vertex *v) { const struct btf_type *next_type; @@ -3874,7 +3874,7 @@ static int btf_tag_resolve(struct btf_verifier_env *env, u32 vlen; next_type = btf_type_by_id(btf, next_type_id); - if (!next_type || !btf_type_is_tag_target(next_type)) { + if (!next_type || !btf_type_is_decl_tag_target(next_type)) { btf_verifier_log_type(env, v->t, "Invalid type_id"); return -EINVAL; } @@ -3883,7 +3883,7 @@ static int btf_tag_resolve(struct btf_verifier_env *env, !env_type_is_resolved(env, next_type_id)) return env_stack_push(env, next_type, next_type_id); - component_idx = btf_type_tag(t)->component_idx; + component_idx = btf_type_decl_tag(t)->component_idx; if (component_idx != -1) { if (btf_type_is_var(next_type)) { btf_verifier_log_type(env, v->t, "Invalid component_idx"); @@ -3909,18 +3909,18 @@ static int btf_tag_resolve(struct btf_verifier_env *env, return 0; } -static void btf_tag_log(struct btf_verifier_env *env, const struct btf_type *t) +static void btf_decl_tag_log(struct btf_verifier_env *env, const struct btf_type *t) { btf_verifier_log(env, "type=%u component_idx=%d", t->type, - btf_type_tag(t)->component_idx); + btf_type_decl_tag(t)->component_idx); } -static const struct btf_kind_operations tag_ops = { - .check_meta = btf_tag_check_meta, - .resolve = btf_tag_resolve, +static const struct btf_kind_operations decl_tag_ops = { + .check_meta = btf_decl_tag_check_meta, + .resolve = btf_decl_tag_resolve, .check_member = btf_df_check_member, .check_kflag_member = btf_df_check_kflag_member, - .log_details = btf_tag_log, + .log_details = btf_decl_tag_log, .show = btf_df_show, }; @@ -4058,7 +4058,7 @@ static const struct btf_kind_operations * const kind_ops[NR_BTF_KINDS] = { [BTF_KIND_VAR] = &var_ops, [BTF_KIND_DATASEC] = &datasec_ops, [BTF_KIND_FLOAT] = &float_ops, - [BTF_KIND_TAG] = &tag_ops, + [BTF_KIND_DECL_TAG] = &decl_tag_ops, }; static s32 btf_check_meta(struct btf_verifier_env *env, @@ -4143,7 +4143,7 @@ static bool btf_resolve_valid(struct btf_verifier_env *env, return !btf_resolved_type_id(btf, type_id) && !btf_resolved_type_size(btf, type_id); - if (btf_type_is_tag(t)) + if (btf_type_is_decl_tag(t)) return btf_resolved_type_id(btf, type_id) && !btf_resolved_type_size(btf, type_id); diff --git a/tools/bpf/bpftool/btf.c b/tools/bpf/bpftool/btf.c index 49743ad968513..7b68d4f65fe62 100644 --- a/tools/bpf/bpftool/btf.c +++ b/tools/bpf/bpftool/btf.c @@ -37,7 +37,7 @@ static const char * const btf_kind_str[NR_BTF_KINDS] = { [BTF_KIND_VAR] = "VAR", [BTF_KIND_DATASEC] = "DATASEC", [BTF_KIND_FLOAT] = "FLOAT", - [BTF_KIND_TAG] = "TAG", + [BTF_KIND_DECL_TAG] = "DECL_TAG", }; struct btf_attach_table { @@ -348,8 +348,8 @@ static int dump_btf_type(const struct btf *btf, __u32 id, printf(" size=%u", t->size); break; } - case BTF_KIND_TAG: { - const struct btf_tag *tag = (const void *)(t + 1); + case BTF_KIND_DECL_TAG: { + const struct btf_decl_tag *tag = (const void *)(t + 1); if (json_output) { jsonw_uint_field(w, "type_id", t->type); diff --git a/tools/include/uapi/linux/btf.h b/tools/include/uapi/linux/btf.h index 642b6ecb37d7e..deb12f755f0fa 100644 --- a/tools/include/uapi/linux/btf.h +++ b/tools/include/uapi/linux/btf.h @@ -43,7 +43,7 @@ struct btf_type { * "size" tells the size of the type it is describing. * * "type" is used by PTR, TYPEDEF, VOLATILE, CONST, RESTRICT, - * FUNC, FUNC_PROTO, VAR and TAG. + * FUNC, FUNC_PROTO, VAR and DECL_TAG. * "type" is a type_id referring to another type. */ union { @@ -74,7 +74,7 @@ enum { BTF_KIND_VAR = 14, /* Variable */ BTF_KIND_DATASEC = 15, /* Section */ BTF_KIND_FLOAT = 16, /* Floating point */ - BTF_KIND_TAG = 17, /* Tag */ + BTF_KIND_DECL_TAG = 17, /* Decl Tag */ NR_BTF_KINDS, BTF_KIND_MAX = NR_BTF_KINDS - 1, @@ -174,14 +174,14 @@ struct btf_var_secinfo { __u32 size; }; -/* BTF_KIND_TAG is followed by a single "struct btf_tag" to describe +/* BTF_KIND_DECL_TAG is followed by a single "struct btf_decl_tag" to describe * additional information related to the tag applied location. * If component_idx == -1, the tag is applied to a struct, union, * variable or function. Otherwise, it is applied to a struct/union * member or a func argument, and component_idx indicates which member * or argument (0 ... vlen-1). */ -struct btf_tag { +struct btf_decl_tag { __s32 component_idx; }; diff --git a/tools/lib/bpf/btf.c b/tools/lib/bpf/btf.c index 60fbd1c6d466d..1f6dea11f6004 100644 --- a/tools/lib/bpf/btf.c +++ b/tools/lib/bpf/btf.c @@ -309,8 +309,8 @@ static int btf_type_size(const struct btf_type *t) return base_size + sizeof(struct btf_var); case BTF_KIND_DATASEC: return base_size + vlen * sizeof(struct btf_var_secinfo); - case BTF_KIND_TAG: - return base_size + sizeof(struct btf_tag); + case BTF_KIND_DECL_TAG: + return base_size + sizeof(struct btf_decl_tag); default: pr_debug("Unsupported BTF_KIND:%u\n", btf_kind(t)); return -EINVAL; @@ -383,8 +383,8 @@ static int btf_bswap_type_rest(struct btf_type *t) v->size = bswap_32(v->size); } return 0; - case BTF_KIND_TAG: - btf_tag(t)->component_idx = bswap_32(btf_tag(t)->component_idx); + case BTF_KIND_DECL_TAG: + btf_decl_tag(t)->component_idx = bswap_32(btf_decl_tag(t)->component_idx); return 0; default: pr_debug("Unsupported BTF_KIND:%u\n", btf_kind(t)); @@ -596,7 +596,7 @@ __s64 btf__resolve_size(const struct btf *btf, __u32 type_id) case BTF_KIND_CONST: case BTF_KIND_RESTRICT: case BTF_KIND_VAR: - case BTF_KIND_TAG: + case BTF_KIND_DECL_TAG: type_id = t->type; break; case BTF_KIND_ARRAY: @@ -2569,7 +2569,7 @@ int btf__add_datasec_var_info(struct btf *btf, int var_type_id, __u32 offset, __ } /* - * Append new BTF_KIND_TAG type with: + * Append new BTF_KIND_DECL_TAG type with: * - *value* - non-empty/non-NULL string; * - *ref_type_id* - referenced type ID, it might not exist yet; * - *component_idx* - -1 for tagging reference type, otherwise struct/union @@ -2578,7 +2578,7 @@ int btf__add_datasec_var_info(struct btf *btf, int var_type_id, __u32 offset, __ * - >0, type ID of newly added BTF type; * - <0, on error. */ -int btf__add_tag(struct btf *btf, const char *value, int ref_type_id, +int btf__add_decl_tag(struct btf *btf, const char *value, int ref_type_id, int component_idx) { struct btf_type *t; @@ -2593,7 +2593,7 @@ int btf__add_tag(struct btf *btf, const char *value, int ref_type_id, if (btf_ensure_modifiable(btf)) return libbpf_err(-ENOMEM); - sz = sizeof(struct btf_type) + sizeof(struct btf_tag); + sz = sizeof(struct btf_type) + sizeof(struct btf_decl_tag); t = btf_add_type_mem(btf, sz); if (!t) return libbpf_err(-ENOMEM); @@ -2603,9 +2603,9 @@ int btf__add_tag(struct btf *btf, const char *value, int ref_type_id, return value_off; t->name_off = value_off; - t->info = btf_type_info(BTF_KIND_TAG, 0, false); + t->info = btf_type_info(BTF_KIND_DECL_TAG, 0, false); t->type = ref_type_id; - btf_tag(t)->component_idx = component_idx; + btf_decl_tag(t)->component_idx = component_idx; return btf_commit_type(btf, sz); } @@ -3427,7 +3427,7 @@ static bool btf_equal_common(struct btf_type *t1, struct btf_type *t2) } /* Calculate type signature hash of INT or TAG. */ -static long btf_hash_int_tag(struct btf_type *t) +static long btf_hash_int_decl_tag(struct btf_type *t) { __u32 info = *(__u32 *)(t + 1); long h; @@ -3705,8 +3705,8 @@ static int btf_dedup_prep(struct btf_dedup *d) h = btf_hash_common(t); break; case BTF_KIND_INT: - case BTF_KIND_TAG: - h = btf_hash_int_tag(t); + case BTF_KIND_DECL_TAG: + h = btf_hash_int_decl_tag(t); break; case BTF_KIND_ENUM: h = btf_hash_enum(t); @@ -3761,11 +3761,11 @@ static int btf_dedup_prim_type(struct btf_dedup *d, __u32 type_id) case BTF_KIND_FUNC_PROTO: case BTF_KIND_VAR: case BTF_KIND_DATASEC: - case BTF_KIND_TAG: + case BTF_KIND_DECL_TAG: return 0; case BTF_KIND_INT: - h = btf_hash_int_tag(t); + h = btf_hash_int_decl_tag(t); for_each_dedup_cand(d, hash_entry, h) { cand_id = (__u32)(long)hash_entry->value; cand = btf_type_by_id(d->btf, cand_id); @@ -4382,13 +4382,13 @@ static int btf_dedup_ref_type(struct btf_dedup *d, __u32 type_id) } break; - case BTF_KIND_TAG: + case BTF_KIND_DECL_TAG: ref_type_id = btf_dedup_ref_type(d, t->type); if (ref_type_id < 0) return ref_type_id; t->type = ref_type_id; - h = btf_hash_int_tag(t); + h = btf_hash_int_decl_tag(t); for_each_dedup_cand(d, hash_entry, h) { cand_id = (__u32)(long)hash_entry->value; cand = btf_type_by_id(d->btf, cand_id); @@ -4671,7 +4671,7 @@ int btf_type_visit_type_ids(struct btf_type *t, type_id_visit_fn visit, void *ct case BTF_KIND_TYPEDEF: case BTF_KIND_FUNC: case BTF_KIND_VAR: - case BTF_KIND_TAG: + case BTF_KIND_DECL_TAG: return visit(&t->type, ctx); case BTF_KIND_ARRAY: { diff --git a/tools/lib/bpf/btf.h b/tools/lib/bpf/btf.h index 864eb51753a1a..4011e206e6f7f 100644 --- a/tools/lib/bpf/btf.h +++ b/tools/lib/bpf/btf.h @@ -236,7 +236,7 @@ LIBBPF_API int btf__add_datasec_var_info(struct btf *btf, int var_type_id, __u32 offset, __u32 byte_sz); /* tag construction API */ -LIBBPF_API int btf__add_tag(struct btf *btf, const char *value, int ref_type_id, +LIBBPF_API int btf__add_decl_tag(struct btf *btf, const char *value, int ref_type_id, int component_idx); struct btf_dedup_opts { @@ -426,9 +426,9 @@ static inline bool btf_is_float(const struct btf_type *t) return btf_kind(t) == BTF_KIND_FLOAT; } -static inline bool btf_is_tag(const struct btf_type *t) +static inline bool btf_is_decl_tag(const struct btf_type *t) { - return btf_kind(t) == BTF_KIND_TAG; + return btf_kind(t) == BTF_KIND_DECL_TAG; } static inline __u8 btf_int_encoding(const struct btf_type *t) @@ -499,10 +499,10 @@ btf_var_secinfos(const struct btf_type *t) return (struct btf_var_secinfo *)(t + 1); } -struct btf_tag; -static inline struct btf_tag *btf_tag(const struct btf_type *t) +struct btf_decl_tag; +static inline struct btf_decl_tag *btf_decl_tag(const struct btf_type *t) { - return (struct btf_tag *)(t + 1); + return (struct btf_decl_tag *)(t + 1); } #ifdef __cplusplus diff --git a/tools/lib/bpf/btf_dump.c b/tools/lib/bpf/btf_dump.c index ad6df97295ae2..5ef42f0abed12 100644 --- a/tools/lib/bpf/btf_dump.c +++ b/tools/lib/bpf/btf_dump.c @@ -316,7 +316,7 @@ static int btf_dump_mark_referenced(struct btf_dump *d) case BTF_KIND_TYPEDEF: case BTF_KIND_FUNC: case BTF_KIND_VAR: - case BTF_KIND_TAG: + case BTF_KIND_DECL_TAG: d->type_states[t->type].referenced = 1; break; @@ -584,7 +584,7 @@ static int btf_dump_order_type(struct btf_dump *d, __u32 id, bool through_ptr) case BTF_KIND_FUNC: case BTF_KIND_VAR: case BTF_KIND_DATASEC: - case BTF_KIND_TAG: + case BTF_KIND_DECL_TAG: d->type_states[id].order_state = ORDERED; return 0; @@ -2217,7 +2217,7 @@ static int btf_dump_dump_type_data(struct btf_dump *d, case BTF_KIND_FWD: case BTF_KIND_FUNC: case BTF_KIND_FUNC_PROTO: - case BTF_KIND_TAG: + case BTF_KIND_DECL_TAG: err = btf_dump_unsupported_data(d, t, id); break; case BTF_KIND_INT: diff --git a/tools/lib/bpf/libbpf.c b/tools/lib/bpf/libbpf.c index ae0889bebe329..63d738654ff69 100644 --- a/tools/lib/bpf/libbpf.c +++ b/tools/lib/bpf/libbpf.c @@ -195,8 +195,8 @@ enum kern_feature_id { FEAT_BTF_FLOAT, /* BPF perf link support */ FEAT_PERF_LINK, - /* BTF_KIND_TAG support */ - FEAT_BTF_TAG, + /* BTF_KIND_DECL_TAG support */ + FEAT_BTF_DECL_TAG, __FEAT_CNT, }; @@ -2024,7 +2024,7 @@ static const char *__btf_kind_str(__u16 kind) case BTF_KIND_VAR: return "var"; case BTF_KIND_DATASEC: return "datasec"; case BTF_KIND_FLOAT: return "float"; - case BTF_KIND_TAG: return "tag"; + case BTF_KIND_DECL_TAG: return "decl_tag"; default: return "unknown"; } } @@ -2524,9 +2524,9 @@ static bool btf_needs_sanitization(struct bpf_object *obj) bool has_datasec = kernel_supports(obj, FEAT_BTF_DATASEC); bool has_float = kernel_supports(obj, FEAT_BTF_FLOAT); bool has_func = kernel_supports(obj, FEAT_BTF_FUNC); - bool has_tag = kernel_supports(obj, FEAT_BTF_TAG); + bool has_decl_tag = kernel_supports(obj, FEAT_BTF_DECL_TAG); - return !has_func || !has_datasec || !has_func_global || !has_float || !has_tag; + return !has_func || !has_datasec || !has_func_global || !has_float || !has_decl_tag; } static void bpf_object__sanitize_btf(struct bpf_object *obj, struct btf *btf) @@ -2535,15 +2535,15 @@ static void bpf_object__sanitize_btf(struct bpf_object *obj, struct btf *btf) bool has_datasec = kernel_supports(obj, FEAT_BTF_DATASEC); bool has_float = kernel_supports(obj, FEAT_BTF_FLOAT); bool has_func = kernel_supports(obj, FEAT_BTF_FUNC); - bool has_tag = kernel_supports(obj, FEAT_BTF_TAG); + bool has_decl_tag = kernel_supports(obj, FEAT_BTF_DECL_TAG); struct btf_type *t; int i, j, vlen; for (i = 1; i <= btf__get_nr_types(btf); i++) { t = (struct btf_type *)btf__type_by_id(btf, i); - if ((!has_datasec && btf_is_var(t)) || (!has_tag && btf_is_tag(t))) { - /* replace VAR/TAG with INT */ + if ((!has_datasec && btf_is_var(t)) || (!has_decl_tag && btf_is_decl_tag(t))) { + /* replace VAR/DECL_TAG with INT */ t->info = BTF_INFO_ENC(BTF_KIND_INT, 0, 0); /* * using size = 1 is the safest choice, 4 will be too @@ -4248,7 +4248,7 @@ static int probe_kern_btf_float(void) strs, sizeof(strs))); } -static int probe_kern_btf_tag(void) +static int probe_kern_btf_decl_tag(void) { static const char strs[] = "\0tag"; __u32 types[] = { @@ -4258,7 +4258,7 @@ static int probe_kern_btf_tag(void) BTF_TYPE_ENC(1, BTF_INFO_ENC(BTF_KIND_VAR, 0, 0), 1), BTF_VAR_STATIC, /* attr */ - BTF_TYPE_TAG_ENC(1, 2, -1), + BTF_TYPE_DECL_TAG_ENC(1, 2, -1), }; return probe_fd(libbpf__load_raw_btf((char *)types, sizeof(types), @@ -4481,8 +4481,8 @@ static struct kern_feature_desc { [FEAT_PERF_LINK] = { "BPF perf link support", probe_perf_link, }, - [FEAT_BTF_TAG] = { - "BTF_KIND_TAG support", probe_kern_btf_tag, + [FEAT_BTF_DECL_TAG] = { + "BTF_KIND_DECL_TAG support", probe_kern_btf_decl_tag, }, }; diff --git a/tools/lib/bpf/libbpf.map b/tools/lib/bpf/libbpf.map index f270d25e4af3f..e6fb1ba493692 100644 --- a/tools/lib/bpf/libbpf.map +++ b/tools/lib/bpf/libbpf.map @@ -394,5 +394,5 @@ LIBBPF_0.6.0 { bpf_object__prev_map; bpf_object__prev_program; btf__add_btf; - btf__add_tag; + btf__add_decl_tag; } LIBBPF_0.5.0; diff --git a/tools/lib/bpf/libbpf_internal.h b/tools/lib/bpf/libbpf_internal.h index f7fd3944d46d3..f6a5748dd3183 100644 --- a/tools/lib/bpf/libbpf_internal.h +++ b/tools/lib/bpf/libbpf_internal.h @@ -69,8 +69,8 @@ #define BTF_VAR_SECINFO_ENC(type, offset, size) (type), (offset), (size) #define BTF_TYPE_FLOAT_ENC(name, sz) \ BTF_TYPE_ENC(name, BTF_INFO_ENC(BTF_KIND_FLOAT, 0, 0), sz) -#define BTF_TYPE_TAG_ENC(value, type, component_idx) \ - BTF_TYPE_ENC(value, BTF_INFO_ENC(BTF_KIND_TAG, 0, 0), type), (component_idx) +#define BTF_TYPE_DECL_TAG_ENC(value, type, component_idx) \ + BTF_TYPE_ENC(value, BTF_INFO_ENC(BTF_KIND_DECL_TAG, 0, 0), type), (component_idx) #ifndef likely #define likely(x) __builtin_expect(!!(x), 1) diff --git a/tools/testing/selftests/bpf/README.rst b/tools/testing/selftests/bpf/README.rst index 554553acc6d94..5e287e445f756 100644 --- a/tools/testing/selftests/bpf/README.rst +++ b/tools/testing/selftests/bpf/README.rst @@ -204,7 +204,7 @@ __ https://reviews.llvm.org/D93563 btf_tag test and Clang version ============================== -The btf_tag selftest require LLVM support to recognize the btf_tag attribute. +The btf_tag selftest require LLVM support to recognize the btf_decl_tag attribute. It was introduced in `Clang 14`__. Without it, the btf_tag selftest will be skipped and you will observe: @@ -213,7 +213,7 @@ Without it, the btf_tag selftest will be skipped and you will observe: # btf_tag:SKIP -__ https://reviews.llvm.org/D106614 +__ https://reviews.llvm.org/D111588 Clang dependencies for static linking tests =========================================== diff --git a/tools/testing/selftests/bpf/btf_helpers.c b/tools/testing/selftests/bpf/btf_helpers.c index ce103fb0ad1b2..668cfa20bb1b9 100644 --- a/tools/testing/selftests/bpf/btf_helpers.c +++ b/tools/testing/selftests/bpf/btf_helpers.c @@ -24,12 +24,12 @@ static const char * const btf_kind_str_mapping[] = { [BTF_KIND_VAR] = "VAR", [BTF_KIND_DATASEC] = "DATASEC", [BTF_KIND_FLOAT] = "FLOAT", - [BTF_KIND_TAG] = "TAG", + [BTF_KIND_DECL_TAG] = "DECL_TAG", }; static const char *btf_kind_str(__u16 kind) { - if (kind > BTF_KIND_TAG) + if (kind > BTF_KIND_DECL_TAG) return "UNKNOWN"; return btf_kind_str_mapping[kind]; } @@ -178,9 +178,9 @@ int fprintf_btf_type_raw(FILE *out, const struct btf *btf, __u32 id) case BTF_KIND_FLOAT: fprintf(out, " size=%u", t->size); break; - case BTF_KIND_TAG: + case BTF_KIND_DECL_TAG: fprintf(out, " type_id=%u component_idx=%d", - t->type, btf_tag(t)->component_idx); + t->type, btf_decl_tag(t)->component_idx); break; default: break; diff --git a/tools/testing/selftests/bpf/prog_tests/btf.c b/tools/testing/selftests/bpf/prog_tests/btf.c index acd33d0cd5d97..fa67f25bbef52 100644 --- a/tools/testing/selftests/bpf/prog_tests/btf.c +++ b/tools/testing/selftests/bpf/prog_tests/btf.c @@ -3662,15 +3662,15 @@ static struct btf_raw_test raw_tests[] = { }, { - .descr = "tag test #1, struct/member, well-formed", + .descr = "decl_tag test #1, struct/member, well-formed", .raw_types = { BTF_TYPE_INT_ENC(0, BTF_INT_SIGNED, 0, 32, 4), /* [1] */ BTF_STRUCT_ENC(0, 2, 8), /* [2] */ BTF_MEMBER_ENC(NAME_TBD, 1, 0), BTF_MEMBER_ENC(NAME_TBD, 1, 32), - BTF_TAG_ENC(NAME_TBD, 2, -1), - BTF_TAG_ENC(NAME_TBD, 2, 0), - BTF_TAG_ENC(NAME_TBD, 2, 1), + BTF_DECL_TAG_ENC(NAME_TBD, 2, -1), + BTF_DECL_TAG_ENC(NAME_TBD, 2, 0), + BTF_DECL_TAG_ENC(NAME_TBD, 2, 1), BTF_END_RAW, }, BTF_STR_SEC("\0m1\0m2\0tag1\0tag2\0tag3"), @@ -3683,15 +3683,15 @@ static struct btf_raw_test raw_tests[] = { .max_entries = 1, }, { - .descr = "tag test #2, union/member, well-formed", + .descr = "decl_tag test #2, union/member, well-formed", .raw_types = { BTF_TYPE_INT_ENC(0, BTF_INT_SIGNED, 0, 32, 4), /* [1] */ BTF_UNION_ENC(NAME_TBD, 2, 4), /* [2] */ BTF_MEMBER_ENC(NAME_TBD, 1, 0), BTF_MEMBER_ENC(NAME_TBD, 1, 0), - BTF_TAG_ENC(NAME_TBD, 2, -1), - BTF_TAG_ENC(NAME_TBD, 2, 0), - BTF_TAG_ENC(NAME_TBD, 2, 1), + BTF_DECL_TAG_ENC(NAME_TBD, 2, -1), + BTF_DECL_TAG_ENC(NAME_TBD, 2, 0), + BTF_DECL_TAG_ENC(NAME_TBD, 2, 1), BTF_END_RAW, }, BTF_STR_SEC("\0t\0m1\0m2\0tag1\0tag2\0tag3"), @@ -3704,13 +3704,13 @@ static struct btf_raw_test raw_tests[] = { .max_entries = 1, }, { - .descr = "tag test #3, variable, well-formed", + .descr = "decl_tag test #3, variable, well-formed", .raw_types = { BTF_TYPE_INT_ENC(0, BTF_INT_SIGNED, 0, 32, 4), /* [1] */ BTF_VAR_ENC(NAME_TBD, 1, 0), /* [2] */ BTF_VAR_ENC(NAME_TBD, 1, 1), /* [3] */ - BTF_TAG_ENC(NAME_TBD, 2, -1), - BTF_TAG_ENC(NAME_TBD, 3, -1), + BTF_DECL_TAG_ENC(NAME_TBD, 2, -1), + BTF_DECL_TAG_ENC(NAME_TBD, 3, -1), BTF_END_RAW, }, BTF_STR_SEC("\0local\0global\0tag1\0tag2"), @@ -3723,16 +3723,16 @@ static struct btf_raw_test raw_tests[] = { .max_entries = 1, }, { - .descr = "tag test #4, func/parameter, well-formed", + .descr = "decl_tag test #4, func/parameter, well-formed", .raw_types = { BTF_TYPE_INT_ENC(0, BTF_INT_SIGNED, 0, 32, 4), /* [1] */ BTF_FUNC_PROTO_ENC(0, 2), /* [2] */ BTF_FUNC_PROTO_ARG_ENC(NAME_TBD, 1), BTF_FUNC_PROTO_ARG_ENC(NAME_TBD, 1), BTF_FUNC_ENC(NAME_TBD, 2), /* [3] */ - BTF_TAG_ENC(NAME_TBD, 3, -1), - BTF_TAG_ENC(NAME_TBD, 3, 0), - BTF_TAG_ENC(NAME_TBD, 3, 1), + BTF_DECL_TAG_ENC(NAME_TBD, 3, -1), + BTF_DECL_TAG_ENC(NAME_TBD, 3, 0), + BTF_DECL_TAG_ENC(NAME_TBD, 3, 1), BTF_END_RAW, }, BTF_STR_SEC("\0arg1\0arg2\0f\0tag1\0tag2\0tag3"), @@ -3745,11 +3745,11 @@ static struct btf_raw_test raw_tests[] = { .max_entries = 1, }, { - .descr = "tag test #5, invalid value", + .descr = "decl_tag test #5, invalid value", .raw_types = { BTF_TYPE_INT_ENC(0, BTF_INT_SIGNED, 0, 32, 4), /* [1] */ BTF_VAR_ENC(NAME_TBD, 1, 0), /* [2] */ - BTF_TAG_ENC(0, 2, -1), + BTF_DECL_TAG_ENC(0, 2, -1), BTF_END_RAW, }, BTF_STR_SEC("\0local\0tag"), @@ -3764,10 +3764,10 @@ static struct btf_raw_test raw_tests[] = { .err_str = "Invalid value", }, { - .descr = "tag test #6, invalid target type", + .descr = "decl_tag test #6, invalid target type", .raw_types = { BTF_TYPE_INT_ENC(0, BTF_INT_SIGNED, 0, 32, 4), /* [1] */ - BTF_TAG_ENC(NAME_TBD, 1, -1), + BTF_DECL_TAG_ENC(NAME_TBD, 1, -1), BTF_END_RAW, }, BTF_STR_SEC("\0tag1"), @@ -3782,11 +3782,11 @@ static struct btf_raw_test raw_tests[] = { .err_str = "Invalid type", }, { - .descr = "tag test #7, invalid vlen", + .descr = "decl_tag test #7, invalid vlen", .raw_types = { BTF_TYPE_INT_ENC(0, BTF_INT_SIGNED, 0, 32, 4), /* [1] */ BTF_VAR_ENC(NAME_TBD, 1, 0), /* [2] */ - BTF_TYPE_ENC(NAME_TBD, BTF_INFO_ENC(BTF_KIND_TAG, 0, 1), 2), (0), + BTF_TYPE_ENC(NAME_TBD, BTF_INFO_ENC(BTF_KIND_DECL_TAG, 0, 1), 2), (0), BTF_END_RAW, }, BTF_STR_SEC("\0local\0tag1"), @@ -3801,11 +3801,11 @@ static struct btf_raw_test raw_tests[] = { .err_str = "vlen != 0", }, { - .descr = "tag test #8, invalid kflag", + .descr = "decl_tag test #8, invalid kflag", .raw_types = { BTF_TYPE_INT_ENC(0, BTF_INT_SIGNED, 0, 32, 4), /* [1] */ BTF_VAR_ENC(NAME_TBD, 1, 0), /* [2] */ - BTF_TYPE_ENC(NAME_TBD, BTF_INFO_ENC(BTF_KIND_TAG, 1, 0), 2), (-1), + BTF_TYPE_ENC(NAME_TBD, BTF_INFO_ENC(BTF_KIND_DECL_TAG, 1, 0), 2), (-1), BTF_END_RAW, }, BTF_STR_SEC("\0local\0tag1"), @@ -3820,11 +3820,11 @@ static struct btf_raw_test raw_tests[] = { .err_str = "Invalid btf_info kind_flag", }, { - .descr = "tag test #9, var, invalid component_idx", + .descr = "decl_tag test #9, var, invalid component_idx", .raw_types = { BTF_TYPE_INT_ENC(0, BTF_INT_SIGNED, 0, 32, 4), /* [1] */ BTF_VAR_ENC(NAME_TBD, 1, 0), /* [2] */ - BTF_TAG_ENC(NAME_TBD, 2, 0), + BTF_DECL_TAG_ENC(NAME_TBD, 2, 0), BTF_END_RAW, }, BTF_STR_SEC("\0local\0tag"), @@ -3839,13 +3839,13 @@ static struct btf_raw_test raw_tests[] = { .err_str = "Invalid component_idx", }, { - .descr = "tag test #10, struct member, invalid component_idx", + .descr = "decl_tag test #10, struct member, invalid component_idx", .raw_types = { BTF_TYPE_INT_ENC(0, BTF_INT_SIGNED, 0, 32, 4), /* [1] */ BTF_STRUCT_ENC(0, 2, 8), /* [2] */ BTF_MEMBER_ENC(NAME_TBD, 1, 0), BTF_MEMBER_ENC(NAME_TBD, 1, 32), - BTF_TAG_ENC(NAME_TBD, 2, 2), + BTF_DECL_TAG_ENC(NAME_TBD, 2, 2), BTF_END_RAW, }, BTF_STR_SEC("\0m1\0m2\0tag"), @@ -3860,14 +3860,14 @@ static struct btf_raw_test raw_tests[] = { .err_str = "Invalid component_idx", }, { - .descr = "tag test #11, func parameter, invalid component_idx", + .descr = "decl_tag test #11, func parameter, invalid component_idx", .raw_types = { BTF_TYPE_INT_ENC(0, BTF_INT_SIGNED, 0, 32, 4), /* [1] */ BTF_FUNC_PROTO_ENC(0, 2), /* [2] */ BTF_FUNC_PROTO_ARG_ENC(NAME_TBD, 1), BTF_FUNC_PROTO_ARG_ENC(NAME_TBD, 1), BTF_FUNC_ENC(NAME_TBD, 2), /* [3] */ - BTF_TAG_ENC(NAME_TBD, 3, 2), + BTF_DECL_TAG_ENC(NAME_TBD, 3, 2), BTF_END_RAW, }, BTF_STR_SEC("\0arg1\0arg2\0f\0tag"), @@ -3882,14 +3882,14 @@ static struct btf_raw_test raw_tests[] = { .err_str = "Invalid component_idx", }, { - .descr = "tag test #12, < -1 component_idx", + .descr = "decl_tag test #12, < -1 component_idx", .raw_types = { BTF_TYPE_INT_ENC(0, BTF_INT_SIGNED, 0, 32, 4), /* [1] */ BTF_FUNC_PROTO_ENC(0, 2), /* [2] */ BTF_FUNC_PROTO_ARG_ENC(NAME_TBD, 1), BTF_FUNC_PROTO_ARG_ENC(NAME_TBD, 1), BTF_FUNC_ENC(NAME_TBD, 2), /* [3] */ - BTF_TAG_ENC(NAME_TBD, 3, -2), + BTF_DECL_TAG_ENC(NAME_TBD, 3, -2), BTF_END_RAW, }, BTF_STR_SEC("\0arg1\0arg2\0f\0tag"), @@ -6672,9 +6672,9 @@ const struct btf_dedup_test dedup_tests[] = { /* const -> [1] int */ BTF_CONST_ENC(1), /* [6] */ /* tag -> [3] struct s */ - BTF_TAG_ENC(NAME_NTH(2), 3, -1), /* [7] */ + BTF_DECL_TAG_ENC(NAME_NTH(2), 3, -1), /* [7] */ /* tag -> [3] struct s, member 1 */ - BTF_TAG_ENC(NAME_NTH(2), 3, 1), /* [8] */ + BTF_DECL_TAG_ENC(NAME_NTH(2), 3, 1), /* [8] */ /* full copy of the above */ BTF_TYPE_INT_ENC(NAME_NTH(1), BTF_INT_SIGNED, 0, 32, 4), /* [9] */ @@ -6689,8 +6689,8 @@ const struct btf_dedup_test dedup_tests[] = { BTF_PTR_ENC(14), /* [13] */ BTF_CONST_ENC(9), /* [14] */ BTF_TYPE_FLOAT_ENC(NAME_NTH(7), 4), /* [15] */ - BTF_TAG_ENC(NAME_NTH(2), 11, -1), /* [16] */ - BTF_TAG_ENC(NAME_NTH(2), 11, 1), /* [17] */ + BTF_DECL_TAG_ENC(NAME_NTH(2), 11, -1), /* [16] */ + BTF_DECL_TAG_ENC(NAME_NTH(2), 11, 1), /* [17] */ BTF_END_RAW, }, BTF_STR_SEC("\0int\0s\0next\0a\0b\0c\0float\0d"), @@ -6714,8 +6714,8 @@ const struct btf_dedup_test dedup_tests[] = { BTF_PTR_ENC(6), /* [5] */ /* const -> [1] int */ BTF_CONST_ENC(1), /* [6] */ - BTF_TAG_ENC(NAME_NTH(2), 3, -1), /* [7] */ - BTF_TAG_ENC(NAME_NTH(2), 3, 1), /* [8] */ + BTF_DECL_TAG_ENC(NAME_NTH(2), 3, -1), /* [7] */ + BTF_DECL_TAG_ENC(NAME_NTH(2), 3, 1), /* [8] */ BTF_TYPE_FLOAT_ENC(NAME_NTH(7), 4), /* [9] */ BTF_END_RAW, }, @@ -6841,8 +6841,8 @@ const struct btf_dedup_test dedup_tests[] = { BTF_FUNC_PROTO_ARG_ENC(NAME_TBD, 8), BTF_FUNC_ENC(NAME_TBD, 12), /* [13] func */ BTF_TYPE_FLOAT_ENC(NAME_TBD, 2), /* [14] float */ - BTF_TAG_ENC(NAME_TBD, 13, -1), /* [15] tag */ - BTF_TAG_ENC(NAME_TBD, 13, 1), /* [16] tag */ + BTF_DECL_TAG_ENC(NAME_TBD, 13, -1), /* [15] tag */ + BTF_DECL_TAG_ENC(NAME_TBD, 13, 1), /* [16] tag */ BTF_END_RAW, }, BTF_STR_SEC("\0A\0B\0C\0D\0E\0F\0G\0H\0I\0J\0K\0L\0M\0N\0O\0P"), @@ -6869,8 +6869,8 @@ const struct btf_dedup_test dedup_tests[] = { BTF_FUNC_PROTO_ARG_ENC(NAME_TBD, 8), BTF_FUNC_ENC(NAME_TBD, 12), /* [13] func */ BTF_TYPE_FLOAT_ENC(NAME_TBD, 2), /* [14] float */ - BTF_TAG_ENC(NAME_TBD, 13, -1), /* [15] tag */ - BTF_TAG_ENC(NAME_TBD, 13, 1), /* [16] tag */ + BTF_DECL_TAG_ENC(NAME_TBD, 13, -1), /* [15] tag */ + BTF_DECL_TAG_ENC(NAME_TBD, 13, 1), /* [16] tag */ BTF_END_RAW, }, BTF_STR_SEC("\0A\0B\0C\0D\0E\0F\0G\0H\0I\0J\0K\0L\0M\0N\0O\0P"), @@ -7036,14 +7036,14 @@ const struct btf_dedup_test dedup_tests[] = { BTF_FUNC_PROTO_ARG_ENC(NAME_NTH(3), 1), BTF_FUNC_ENC(NAME_NTH(4), 2), /* [4] */ /* tag -> t */ - BTF_TAG_ENC(NAME_NTH(5), 2, -1), /* [5] */ - BTF_TAG_ENC(NAME_NTH(5), 2, -1), /* [6] */ + BTF_DECL_TAG_ENC(NAME_NTH(5), 2, -1), /* [5] */ + BTF_DECL_TAG_ENC(NAME_NTH(5), 2, -1), /* [6] */ /* tag -> func */ - BTF_TAG_ENC(NAME_NTH(5), 4, -1), /* [7] */ - BTF_TAG_ENC(NAME_NTH(5), 4, -1), /* [8] */ + BTF_DECL_TAG_ENC(NAME_NTH(5), 4, -1), /* [7] */ + BTF_DECL_TAG_ENC(NAME_NTH(5), 4, -1), /* [8] */ /* tag -> func arg a1 */ - BTF_TAG_ENC(NAME_NTH(5), 4, 1), /* [9] */ - BTF_TAG_ENC(NAME_NTH(5), 4, 1), /* [10] */ + BTF_DECL_TAG_ENC(NAME_NTH(5), 4, 1), /* [9] */ + BTF_DECL_TAG_ENC(NAME_NTH(5), 4, 1), /* [10] */ BTF_END_RAW, }, BTF_STR_SEC("\0t\0a1\0a2\0f\0tag"), @@ -7056,9 +7056,9 @@ const struct btf_dedup_test dedup_tests[] = { BTF_FUNC_PROTO_ARG_ENC(NAME_NTH(2), 1), BTF_FUNC_PROTO_ARG_ENC(NAME_NTH(3), 1), BTF_FUNC_ENC(NAME_NTH(4), 2), /* [4] */ - BTF_TAG_ENC(NAME_NTH(5), 2, -1), /* [5] */ - BTF_TAG_ENC(NAME_NTH(5), 4, -1), /* [6] */ - BTF_TAG_ENC(NAME_NTH(5), 4, 1), /* [7] */ + BTF_DECL_TAG_ENC(NAME_NTH(5), 2, -1), /* [5] */ + BTF_DECL_TAG_ENC(NAME_NTH(5), 4, -1), /* [6] */ + BTF_DECL_TAG_ENC(NAME_NTH(5), 4, 1), /* [7] */ BTF_END_RAW, }, BTF_STR_SEC("\0t\0a1\0a2\0f\0tag"), @@ -7084,17 +7084,17 @@ const struct btf_dedup_test dedup_tests[] = { BTF_FUNC_PROTO_ARG_ENC(NAME_NTH(2), 1), BTF_FUNC_ENC(NAME_NTH(3), 4), /* [5] */ /* tag -> f: tag1, tag2 */ - BTF_TAG_ENC(NAME_NTH(4), 3, -1), /* [6] */ - BTF_TAG_ENC(NAME_NTH(5), 3, -1), /* [7] */ + BTF_DECL_TAG_ENC(NAME_NTH(4), 3, -1), /* [6] */ + BTF_DECL_TAG_ENC(NAME_NTH(5), 3, -1), /* [7] */ /* tag -> f/a2: tag1, tag2 */ - BTF_TAG_ENC(NAME_NTH(4), 3, 1), /* [8] */ - BTF_TAG_ENC(NAME_NTH(5), 3, 1), /* [9] */ + BTF_DECL_TAG_ENC(NAME_NTH(4), 3, 1), /* [8] */ + BTF_DECL_TAG_ENC(NAME_NTH(5), 3, 1), /* [9] */ /* tag -> f: tag1, tag3 */ - BTF_TAG_ENC(NAME_NTH(4), 5, -1), /* [10] */ - BTF_TAG_ENC(NAME_NTH(6), 5, -1), /* [11] */ + BTF_DECL_TAG_ENC(NAME_NTH(4), 5, -1), /* [10] */ + BTF_DECL_TAG_ENC(NAME_NTH(6), 5, -1), /* [11] */ /* tag -> f/a2: tag1, tag3 */ - BTF_TAG_ENC(NAME_NTH(4), 5, 1), /* [12] */ - BTF_TAG_ENC(NAME_NTH(6), 5, 1), /* [13] */ + BTF_DECL_TAG_ENC(NAME_NTH(4), 5, 1), /* [12] */ + BTF_DECL_TAG_ENC(NAME_NTH(6), 5, 1), /* [13] */ BTF_END_RAW, }, BTF_STR_SEC("\0a1\0a2\0f\0tag1\0tag2\0tag3"), @@ -7106,12 +7106,12 @@ const struct btf_dedup_test dedup_tests[] = { BTF_FUNC_PROTO_ARG_ENC(NAME_NTH(1), 1), BTF_FUNC_PROTO_ARG_ENC(NAME_NTH(2), 1), BTF_FUNC_ENC(NAME_NTH(3), 2), /* [3] */ - BTF_TAG_ENC(NAME_NTH(4), 3, -1), /* [4] */ - BTF_TAG_ENC(NAME_NTH(5), 3, -1), /* [5] */ - BTF_TAG_ENC(NAME_NTH(6), 3, -1), /* [6] */ - BTF_TAG_ENC(NAME_NTH(4), 3, 1), /* [7] */ - BTF_TAG_ENC(NAME_NTH(5), 3, 1), /* [8] */ - BTF_TAG_ENC(NAME_NTH(6), 3, 1), /* [9] */ + BTF_DECL_TAG_ENC(NAME_NTH(4), 3, -1), /* [4] */ + BTF_DECL_TAG_ENC(NAME_NTH(5), 3, -1), /* [5] */ + BTF_DECL_TAG_ENC(NAME_NTH(6), 3, -1), /* [6] */ + BTF_DECL_TAG_ENC(NAME_NTH(4), 3, 1), /* [7] */ + BTF_DECL_TAG_ENC(NAME_NTH(5), 3, 1), /* [8] */ + BTF_DECL_TAG_ENC(NAME_NTH(6), 3, 1), /* [9] */ BTF_END_RAW, }, BTF_STR_SEC("\0a1\0a2\0f\0tag1\0tag2\0tag3"), @@ -7133,17 +7133,17 @@ const struct btf_dedup_test dedup_tests[] = { BTF_MEMBER_ENC(NAME_NTH(2), 1, 0), BTF_MEMBER_ENC(NAME_NTH(3), 1, 32), /* tag -> t: tag1, tag2 */ - BTF_TAG_ENC(NAME_NTH(4), 2, -1), /* [4] */ - BTF_TAG_ENC(NAME_NTH(5), 2, -1), /* [5] */ + BTF_DECL_TAG_ENC(NAME_NTH(4), 2, -1), /* [4] */ + BTF_DECL_TAG_ENC(NAME_NTH(5), 2, -1), /* [5] */ /* tag -> t/m2: tag1, tag2 */ - BTF_TAG_ENC(NAME_NTH(4), 2, 1), /* [6] */ - BTF_TAG_ENC(NAME_NTH(5), 2, 1), /* [7] */ + BTF_DECL_TAG_ENC(NAME_NTH(4), 2, 1), /* [6] */ + BTF_DECL_TAG_ENC(NAME_NTH(5), 2, 1), /* [7] */ /* tag -> t: tag1, tag3 */ - BTF_TAG_ENC(NAME_NTH(4), 3, -1), /* [8] */ - BTF_TAG_ENC(NAME_NTH(6), 3, -1), /* [9] */ + BTF_DECL_TAG_ENC(NAME_NTH(4), 3, -1), /* [8] */ + BTF_DECL_TAG_ENC(NAME_NTH(6), 3, -1), /* [9] */ /* tag -> t/m2: tag1, tag3 */ - BTF_TAG_ENC(NAME_NTH(4), 3, 1), /* [10] */ - BTF_TAG_ENC(NAME_NTH(6), 3, 1), /* [11] */ + BTF_DECL_TAG_ENC(NAME_NTH(4), 3, 1), /* [10] */ + BTF_DECL_TAG_ENC(NAME_NTH(6), 3, 1), /* [11] */ BTF_END_RAW, }, BTF_STR_SEC("\0t\0m1\0m2\0tag1\0tag2\0tag3"), @@ -7154,12 +7154,12 @@ const struct btf_dedup_test dedup_tests[] = { BTF_STRUCT_ENC(NAME_NTH(1), 2, 8), /* [2] */ BTF_MEMBER_ENC(NAME_NTH(2), 1, 0), BTF_MEMBER_ENC(NAME_NTH(3), 1, 32), - BTF_TAG_ENC(NAME_NTH(4), 2, -1), /* [3] */ - BTF_TAG_ENC(NAME_NTH(5), 2, -1), /* [4] */ - BTF_TAG_ENC(NAME_NTH(6), 2, -1), /* [5] */ - BTF_TAG_ENC(NAME_NTH(4), 2, 1), /* [6] */ - BTF_TAG_ENC(NAME_NTH(5), 2, 1), /* [7] */ - BTF_TAG_ENC(NAME_NTH(6), 2, 1), /* [8] */ + BTF_DECL_TAG_ENC(NAME_NTH(4), 2, -1), /* [3] */ + BTF_DECL_TAG_ENC(NAME_NTH(5), 2, -1), /* [4] */ + BTF_DECL_TAG_ENC(NAME_NTH(6), 2, -1), /* [5] */ + BTF_DECL_TAG_ENC(NAME_NTH(4), 2, 1), /* [6] */ + BTF_DECL_TAG_ENC(NAME_NTH(5), 2, 1), /* [7] */ + BTF_DECL_TAG_ENC(NAME_NTH(6), 2, 1), /* [8] */ BTF_END_RAW, }, BTF_STR_SEC("\0t\0m1\0m2\0tag1\0tag2\0tag3"), @@ -7202,8 +7202,8 @@ static int btf_type_size(const struct btf_type *t) return base_size + sizeof(struct btf_var); case BTF_KIND_DATASEC: return base_size + vlen * sizeof(struct btf_var_secinfo); - case BTF_KIND_TAG: - return base_size + sizeof(struct btf_tag); + case BTF_KIND_DECL_TAG: + return base_size + sizeof(struct btf_decl_tag); default: fprintf(stderr, "Unsupported BTF_KIND:%u\n", kind); return -EINVAL; diff --git a/tools/testing/selftests/bpf/prog_tests/btf_write.c b/tools/testing/selftests/bpf/prog_tests/btf_write.c index 886e0fc1efb12..b912eeb0b6b48 100644 --- a/tools/testing/selftests/bpf/prog_tests/btf_write.c +++ b/tools/testing/selftests/bpf/prog_tests/btf_write.c @@ -277,26 +277,26 @@ static void gen_btf(struct btf *btf) "[17] DATASEC 'datasec1' size=12 vlen=1\n" "\ttype_id=1 offset=4 size=8", "raw_dump"); - /* TAG */ - id = btf__add_tag(btf, "tag1", 16, -1); + /* DECL_TAG */ + id = btf__add_decl_tag(btf, "tag1", 16, -1); ASSERT_EQ(id, 18, "tag_id"); t = btf__type_by_id(btf, 18); ASSERT_STREQ(btf__str_by_offset(btf, t->name_off), "tag1", "tag_value"); - ASSERT_EQ(btf_kind(t), BTF_KIND_TAG, "tag_kind"); + ASSERT_EQ(btf_kind(t), BTF_KIND_DECL_TAG, "tag_kind"); ASSERT_EQ(t->type, 16, "tag_type"); - ASSERT_EQ(btf_tag(t)->component_idx, -1, "tag_component_idx"); + ASSERT_EQ(btf_decl_tag(t)->component_idx, -1, "tag_component_idx"); ASSERT_STREQ(btf_type_raw_dump(btf, 18), - "[18] TAG 'tag1' type_id=16 component_idx=-1", "raw_dump"); + "[18] DECL_TAG 'tag1' type_id=16 component_idx=-1", "raw_dump"); - id = btf__add_tag(btf, "tag2", 14, 1); + id = btf__add_decl_tag(btf, "tag2", 14, 1); ASSERT_EQ(id, 19, "tag_id"); t = btf__type_by_id(btf, 19); ASSERT_STREQ(btf__str_by_offset(btf, t->name_off), "tag2", "tag_value"); - ASSERT_EQ(btf_kind(t), BTF_KIND_TAG, "tag_kind"); + ASSERT_EQ(btf_kind(t), BTF_KIND_DECL_TAG, "tag_kind"); ASSERT_EQ(t->type, 14, "tag_type"); - ASSERT_EQ(btf_tag(t)->component_idx, 1, "tag_component_idx"); + ASSERT_EQ(btf_decl_tag(t)->component_idx, 1, "tag_component_idx"); ASSERT_STREQ(btf_type_raw_dump(btf, 19), - "[19] TAG 'tag2' type_id=14 component_idx=1", "raw_dump"); + "[19] DECL_TAG 'tag2' type_id=14 component_idx=1", "raw_dump"); } static void test_btf_add() @@ -336,8 +336,8 @@ static void test_btf_add() "[16] VAR 'var1' type_id=1, linkage=global-alloc", "[17] DATASEC 'datasec1' size=12 vlen=1\n" "\ttype_id=1 offset=4 size=8", - "[18] TAG 'tag1' type_id=16 component_idx=-1", - "[19] TAG 'tag2' type_id=14 component_idx=1"); + "[18] DECL_TAG 'tag1' type_id=16 component_idx=-1", + "[19] DECL_TAG 'tag2' type_id=14 component_idx=1"); btf__free(btf); } @@ -389,8 +389,8 @@ static void test_btf_add_btf() "[16] VAR 'var1' type_id=1, linkage=global-alloc", "[17] DATASEC 'datasec1' size=12 vlen=1\n" "\ttype_id=1 offset=4 size=8", - "[18] TAG 'tag1' type_id=16 component_idx=-1", - "[19] TAG 'tag2' type_id=14 component_idx=1", + "[18] DECL_TAG 'tag1' type_id=16 component_idx=-1", + "[19] DECL_TAG 'tag2' type_id=14 component_idx=1", /* types appended from the second BTF */ "[20] INT 'int' size=4 bits_offset=0 nr_bits=32 encoding=SIGNED", @@ -418,8 +418,8 @@ static void test_btf_add_btf() "[35] VAR 'var1' type_id=20, linkage=global-alloc", "[36] DATASEC 'datasec1' size=12 vlen=1\n" "\ttype_id=20 offset=4 size=8", - "[37] TAG 'tag1' type_id=35 component_idx=-1", - "[38] TAG 'tag2' type_id=33 component_idx=1"); + "[37] DECL_TAG 'tag1' type_id=35 component_idx=-1", + "[38] DECL_TAG 'tag2' type_id=33 component_idx=1"); cleanup: btf__free(btf1); diff --git a/tools/testing/selftests/bpf/progs/tag.c b/tools/testing/selftests/bpf/progs/tag.c index b46b1bfac7da3..672d19e7b120b 100644 --- a/tools/testing/selftests/bpf/progs/tag.c +++ b/tools/testing/selftests/bpf/progs/tag.c @@ -8,9 +8,9 @@ #define __has_attribute(x) 0 #endif -#if __has_attribute(btf_tag) -#define __tag1 __attribute__((btf_tag("tag1"))) -#define __tag2 __attribute__((btf_tag("tag2"))) +#if __has_attribute(btf_decl_tag) +#define __tag1 __attribute__((btf_decl_tag("tag1"))) +#define __tag2 __attribute__((btf_decl_tag("tag2"))) volatile const bool skip_tests __tag1 __tag2 = false; #else #define __tag1 diff --git a/tools/testing/selftests/bpf/test_btf.h b/tools/testing/selftests/bpf/test_btf.h index 0619e06d745e1..32c7a57867da2 100644 --- a/tools/testing/selftests/bpf/test_btf.h +++ b/tools/testing/selftests/bpf/test_btf.h @@ -69,7 +69,7 @@ #define BTF_TYPE_FLOAT_ENC(name, sz) \ BTF_TYPE_ENC(name, BTF_INFO_ENC(BTF_KIND_FLOAT, 0, 0), sz) -#define BTF_TAG_ENC(value, type, component_idx) \ - BTF_TYPE_ENC(value, BTF_INFO_ENC(BTF_KIND_TAG, 0, 0), type), (component_idx) +#define BTF_DECL_TAG_ENC(value, type, component_idx) \ + BTF_TYPE_ENC(value, BTF_INFO_ENC(BTF_KIND_DECL_TAG, 0, 0), type), (component_idx) #endif /* _TEST_BTF_H */ From d51b6b2287ae0c5432f03457a1dbc844e1968d12 Mon Sep 17 00:00:00 2001 From: Quentin Monnet Date: Sun, 10 Oct 2021 01:25:28 +0100 Subject: [PATCH 063/181] libbpf: Remove Makefile warnings on out-of-sync netlink.h/if_link.h Although relying on some definitions from the netlink.h and if_link.h headers copied into tools/include/uapi/linux/, libbpf does not need those headers to stay entirely up-to-date with their original versions, and the warnings emitted by the Makefile when it detects a difference are usually just noise. Let's remove those warnings. Suggested-by: Andrii Nakryiko Signed-off-by: Quentin Monnet Signed-off-by: Andrii Nakryiko Link: https://lore.kernel.org/bpf/20211010002528.9772-1-quentin@isovalent.com --- tools/lib/bpf/Makefile | 6 ------ 1 file changed, 6 deletions(-) diff --git a/tools/lib/bpf/Makefile b/tools/lib/bpf/Makefile index 9c6804ca5b451..b393b5e823804 100644 --- a/tools/lib/bpf/Makefile +++ b/tools/lib/bpf/Makefile @@ -146,12 +146,6 @@ $(BPF_IN_SHARED): force $(BPF_GENERATED) @(test -f ../../include/uapi/linux/bpf_common.h -a -f ../../../include/uapi/linux/bpf_common.h && ( \ (diff -B ../../include/uapi/linux/bpf_common.h ../../../include/uapi/linux/bpf_common.h >/dev/null) || \ echo "Warning: Kernel ABI header at 'tools/include/uapi/linux/bpf_common.h' differs from latest version at 'include/uapi/linux/bpf_common.h'" >&2 )) || true - @(test -f ../../include/uapi/linux/netlink.h -a -f ../../../include/uapi/linux/netlink.h && ( \ - (diff -B ../../include/uapi/linux/netlink.h ../../../include/uapi/linux/netlink.h >/dev/null) || \ - echo "Warning: Kernel ABI header at 'tools/include/uapi/linux/netlink.h' differs from latest version at 'include/uapi/linux/netlink.h'" >&2 )) || true - @(test -f ../../include/uapi/linux/if_link.h -a -f ../../../include/uapi/linux/if_link.h && ( \ - (diff -B ../../include/uapi/linux/if_link.h ../../../include/uapi/linux/if_link.h >/dev/null) || \ - echo "Warning: Kernel ABI header at 'tools/include/uapi/linux/if_link.h' differs from latest version at 'include/uapi/linux/if_link.h'" >&2 )) || true @(test -f ../../include/uapi/linux/if_xdp.h -a -f ../../../include/uapi/linux/if_xdp.h && ( \ (diff -B ../../include/uapi/linux/if_xdp.h ../../../include/uapi/linux/if_xdp.h >/dev/null) || \ echo "Warning: Kernel ABI header at 'tools/include/uapi/linux/if_xdp.h' differs from latest version at 'include/uapi/linux/if_xdp.h'" >&2 )) || true From 34e3ab1447db13daad066bb9fcf76cb88a827da6 Mon Sep 17 00:00:00 2001 From: Quentin Monnet Date: Sat, 9 Oct 2021 22:03:39 +0100 Subject: [PATCH 064/181] bpftool: Fix install for libbpf's internal header(s) We recently updated bpftool's Makefile to make it install the headers from libbpf, instead of pulling them directly from libbpf's directory. There is also an additional header, internal to libbpf, that needs be installed. The way that bpftool's Makefile installs that particular header is currently correct, but would break if we were to modify $(LIBBPF_INTERNAL_HDRS) to make it point to more than one header. Use a static pattern rule instead, so that the Makefile can withstand the addition of other headers to install. The objective is simply to make the Makefile more robust. It should _not_ be read as an invitation to import more internal headers from libbpf into bpftool. Fixes: f012ade10b34 ("bpftool: Install libbpf headers instead of including the dir") Reported-by: Andrii Nakryiko Signed-off-by: Quentin Monnet Signed-off-by: Andrii Nakryiko Acked-by: John Fastabend Link: https://lore.kernel.org/bpf/20211009210341.6291-2-quentin@isovalent.com --- tools/bpf/bpftool/Makefile | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/tools/bpf/bpftool/Makefile b/tools/bpf/bpftool/Makefile index 9c2d13c513f0e..646fd40b32533 100644 --- a/tools/bpf/bpftool/Makefile +++ b/tools/bpf/bpftool/Makefile @@ -14,7 +14,7 @@ else Q = @ endif -BPF_DIR = $(srctree)/tools/lib/bpf/ +BPF_DIR = $(srctree)/tools/lib/bpf ifneq ($(OUTPUT),) _OUTPUT := $(OUTPUT) @@ -25,6 +25,7 @@ BOOTSTRAP_OUTPUT := $(_OUTPUT)/bootstrap/ LIBBPF_OUTPUT := $(_OUTPUT)/libbpf/ LIBBPF_DESTDIR := $(LIBBPF_OUTPUT) LIBBPF_INCLUDE := $(LIBBPF_DESTDIR)/include +LIBBPF_HDRS_DIR := $(LIBBPF_INCLUDE)/bpf LIBBPF = $(LIBBPF_OUTPUT)libbpf.a LIBBPF_BOOTSTRAP_OUTPUT = $(BOOTSTRAP_OUTPUT)libbpf/ @@ -32,7 +33,7 @@ LIBBPF_BOOTSTRAP = $(LIBBPF_BOOTSTRAP_OUTPUT)libbpf.a # We need to copy nlattr.h which is not otherwise exported by libbpf, but still # required by bpftool. -LIBBPF_INTERNAL_HDRS := nlattr.h +LIBBPF_INTERNAL_HDRS := $(addprefix $(LIBBPF_HDRS_DIR)/,nlattr.h) ifeq ($(BPFTOOL_VERSION),) BPFTOOL_VERSION := $(shell make -rR --no-print-directory -sC ../../.. kernelversion) @@ -45,10 +46,9 @@ $(LIBBPF): FORCE | $(LIBBPF_OUTPUT) $(Q)$(MAKE) -C $(BPF_DIR) OUTPUT=$(LIBBPF_OUTPUT) \ DESTDIR=$(LIBBPF_DESTDIR) prefix= $(LIBBPF) install_headers -$(LIBBPF_INCLUDE)/bpf/$(LIBBPF_INTERNAL_HDRS): \ - $(addprefix $(BPF_DIR),$(LIBBPF_INTERNAL_HDRS)) $(LIBBPF) - $(call QUIET_INSTALL, bpf/$(notdir $@)) - $(Q)install -m 644 -t $(LIBBPF_INCLUDE)/bpf/ $(BPF_DIR)$(notdir $@) +$(LIBBPF_INTERNAL_HDRS): $(LIBBPF_HDRS_DIR)/%.h: $(BPF_DIR)/%.h $(LIBBPF) + $(call QUIET_INSTALL, $@) + $(Q)install -m 644 -t $(LIBBPF_HDRS_DIR) $< $(LIBBPF_BOOTSTRAP): FORCE | $(LIBBPF_BOOTSTRAP_OUTPUT) $(Q)$(MAKE) -C $(BPF_DIR) OUTPUT=$(LIBBPF_BOOTSTRAP_OUTPUT) \ @@ -150,7 +150,7 @@ BOOTSTRAP_OBJS = $(addprefix $(BOOTSTRAP_OUTPUT),main.o common.o json_writer.o g $(BOOTSTRAP_OBJS): $(LIBBPF_BOOTSTRAP) OBJS = $(patsubst %.c,$(OUTPUT)%.o,$(SRCS)) $(OUTPUT)disasm.o -$(OBJS): $(LIBBPF) $(LIBBPF_INCLUDE)/bpf/$(LIBBPF_INTERNAL_HDRS) +$(OBJS): $(LIBBPF) $(LIBBPF_INTERNAL_HDRS) VMLINUX_BTF_PATHS ?= $(if $(O),$(O)/vmlinux) \ $(if $(KBUILD_OUTPUT),$(KBUILD_OUTPUT)/vmlinux) \ From ced846c65e8ffdaac7138936cdbbc9337a939fb9 Mon Sep 17 00:00:00 2001 From: Quentin Monnet Date: Sat, 9 Oct 2021 22:03:40 +0100 Subject: [PATCH 065/181] bpftool: Do not FORCE-build libbpf In bpftool's Makefile, libbpf has a FORCE dependency, to make sure we rebuild it in case its source files changed. Let's instead make the rebuild depend on the source files directly, through a call to the "$(wildcard ...)" function. This avoids descending into libbpf's directory if there is nothing to update. Do the same for the bootstrap libbpf version. This results in a slightly faster operation and less verbose output when running make a second time in bpftool's directory. Before: Auto-detecting system features: ... libbfd: [ on ] ... disassembler-four-args: [ on ] ... zlib: [ on ] ... libcap: [ on ] ... clang-bpf-co-re: [ on ] make[1]: Entering directory '/root/dev/linux/tools/lib/bpf' make[1]: Entering directory '/root/dev/linux/tools/lib/bpf' make[1]: Nothing to be done for 'install_headers'. make[1]: Leaving directory '/root/dev/linux/tools/lib/bpf' make[1]: Leaving directory '/root/dev/linux/tools/lib/bpf' After: Auto-detecting system features: ... libbfd: [ on ] ... disassembler-four-args: [ on ] ... zlib: [ on ] ... libcap: [ on ] ... clang-bpf-co-re: [ on ] Other ways to clean up the output could be to pass the "-s" option, or to redirect the output to >/dev/null, when calling make recursively to descend into libbpf's directory. However, this would suppress some useful output if something goes wrong during the build. A better alternative would be to pass "--no-print-directory" to the recursive make, but that would still leave us with some noise for "install_headers". Skipping the descent into libbpf's directory if no source file has changed works best, and seems the most logical option overall. Reported-by: Andrii Nakryiko Signed-off-by: Quentin Monnet Signed-off-by: Andrii Nakryiko Acked-by: John Fastabend Link: https://lore.kernel.org/bpf/20211009210341.6291-3-quentin@isovalent.com --- tools/bpf/bpftool/Makefile | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tools/bpf/bpftool/Makefile b/tools/bpf/bpftool/Makefile index 646fd40b32533..331019f6d5b16 100644 --- a/tools/bpf/bpftool/Makefile +++ b/tools/bpf/bpftool/Makefile @@ -42,7 +42,7 @@ endif $(LIBBPF_OUTPUT) $(BOOTSTRAP_OUTPUT) $(LIBBPF_BOOTSTRAP_OUTPUT): $(QUIET_MKDIR)mkdir -p $@ -$(LIBBPF): FORCE | $(LIBBPF_OUTPUT) +$(LIBBPF): $(wildcard $(BPF_DIR)/*.[ch] $(BPF_DIR)/Makefile) | $(LIBBPF_OUTPUT) $(Q)$(MAKE) -C $(BPF_DIR) OUTPUT=$(LIBBPF_OUTPUT) \ DESTDIR=$(LIBBPF_DESTDIR) prefix= $(LIBBPF) install_headers @@ -50,7 +50,7 @@ $(LIBBPF_INTERNAL_HDRS): $(LIBBPF_HDRS_DIR)/%.h: $(BPF_DIR)/%.h $(LIBBPF) $(call QUIET_INSTALL, $@) $(Q)install -m 644 -t $(LIBBPF_HDRS_DIR) $< -$(LIBBPF_BOOTSTRAP): FORCE | $(LIBBPF_BOOTSTRAP_OUTPUT) +$(LIBBPF_BOOTSTRAP): $(wildcard $(BPF_DIR)/*.[ch] $(BPF_DIR)/Makefile) | $(LIBBPF_BOOTSTRAP_OUTPUT) $(Q)$(MAKE) -C $(BPF_DIR) OUTPUT=$(LIBBPF_BOOTSTRAP_OUTPUT) \ ARCH= CC=$(HOSTCC) LD=$(HOSTLD) $@ From 062e1fc008ded14a637ed9c8631fa31f57534dfc Mon Sep 17 00:00:00 2001 From: Quentin Monnet Date: Sat, 9 Oct 2021 22:03:41 +0100 Subject: [PATCH 066/181] bpftool: Turn check on zlib from a phony target into a conditional error One of bpftool's object files depends on zlib. To make sure we do not attempt to build that object when the library is not available, commit d66fa3c70e59 ("tools: bpftool: add feature check for zlib") introduced a feature check to detect whether zlib is present. This check comes as a rule for which the target ("zdep") is a nonexistent file (phony target), which means that the Makefile always attempts to rebuild it. It is mostly harmless. However, one side effect is that, on running again once bpftool is already built, make considers that "something" (the recipe for zdep) was executed, and does not print the usual message "make: Nothing to be done for 'all'", which is a user-friendly indicator that the build went fine. Before, with some level of debugging information: $ make --debug=m [...] Reading makefiles... Auto-detecting system features: ... libbfd: [ on ] ... disassembler-four-args: [ on ] ... zlib: [ on ] ... libcap: [ on ] ... clang-bpf-co-re: [ on ] Updating makefiles.... Updating goal targets.... File 'all' does not exist. File 'zdep' does not exist. Must remake target 'zdep'. File 'all' does not exist. Must remake target 'all'. Successfully remade target file 'all'. After the patch: $ make --debug=m [...] Auto-detecting system features: ... libbfd: [ on ] ... disassembler-four-args: [ on ] ... zlib: [ on ] ... libcap: [ on ] ... clang-bpf-co-re: [ on ] Updating makefiles.... Updating goal targets.... File 'all' does not exist. Must remake target 'all'. Successfully remade target file 'all'. make: Nothing to be done for 'all'. (Note the last line, which is not part of make's debug information.) Signed-off-by: Quentin Monnet Signed-off-by: Andrii Nakryiko Acked-by: John Fastabend Link: https://lore.kernel.org/bpf/20211009210341.6291-4-quentin@isovalent.com --- tools/bpf/bpftool/Makefile | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/tools/bpf/bpftool/Makefile b/tools/bpf/bpftool/Makefile index 331019f6d5b16..abcef1f72d65b 100644 --- a/tools/bpf/bpftool/Makefile +++ b/tools/bpf/bpftool/Makefile @@ -198,7 +198,10 @@ $(BOOTSTRAP_OUTPUT)disasm.o: $(srctree)/kernel/bpf/disasm.c $(OUTPUT)disasm.o: $(srctree)/kernel/bpf/disasm.c $(QUIET_CC)$(CC) $(CFLAGS) -c -MMD -o $@ $< -$(OUTPUT)feature.o: | zdep +$(OUTPUT)feature.o: +ifneq ($(feature-zlib), 1) + $(error "No zlib found") +endif $(BPFTOOL_BOOTSTRAP): $(BOOTSTRAP_OBJS) $(LIBBPF_BOOTSTRAP) $(QUIET_LINK)$(HOSTCC) $(CFLAGS) $(LDFLAGS) -o $@ $(BOOTSTRAP_OBJS) \ @@ -254,10 +257,7 @@ doc-uninstall: FORCE: -zdep: - @if [ "$(feature-zlib)" != "1" ]; then echo "No zlib found"; exit 1 ; fi - .SECONDARY: -.PHONY: all FORCE clean install-bin install uninstall zdep +.PHONY: all FORCE clean install-bin install uninstall .PHONY: doc doc-clean doc-install doc-uninstall .DEFAULT_GOAL := all From 588cd7ef5382eb89d09a64ee9ba85e4f9a5b0bcb Mon Sep 17 00:00:00 2001 From: Kumar Kartikeya Dwivedi Date: Sat, 9 Oct 2021 09:39:00 +0530 Subject: [PATCH 067/181] bpf: Silence Coverity warning for find_kfunc_desc_btf The helper function returns a pointer that in the failure case encodes an error in the struct btf pointer. The current code lead to Coverity warning about the use of the invalid pointer: *** CID 1507963: Memory - illegal accesses (USE_AFTER_FREE) /kernel/bpf/verifier.c: 1788 in find_kfunc_desc_btf() 1782 return ERR_PTR(-EINVAL); 1783 } 1784 1785 kfunc_btf = __find_kfunc_desc_btf(env, offset, btf_modp); 1786 if (IS_ERR_OR_NULL(kfunc_btf)) { 1787 verbose(env, "cannot find module BTF for func_id %u\n", func_id); >>> CID 1507963: Memory - illegal accesses (USE_AFTER_FREE) >>> Using freed pointer "kfunc_btf". 1788 return kfunc_btf ?: ERR_PTR(-ENOENT); 1789 } 1790 return kfunc_btf; 1791 } 1792 return btf_vmlinux ?: ERR_PTR(-ENOENT); 1793 } Daniel suggested the use of ERR_CAST so that the intended use is clear to Coverity, but on closer look it seems that we never return NULL from the helper. Andrii noted that since __find_kfunc_desc_btf already logs errors for all cases except btf_get_by_fd, it is much easier to add logging for that and remove the IS_ERR check altogether, returning directly from it. Suggested-by: Andrii Nakryiko Signed-off-by: Kumar Kartikeya Dwivedi Signed-off-by: Andrii Nakryiko Link: https://lore.kernel.org/bpf/20211009040900.803436-1-memxor@gmail.com --- kernel/bpf/verifier.c | 13 ++++--------- 1 file changed, 4 insertions(+), 9 deletions(-) diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c index 20900a1bac12f..21cdff35a2f9e 100644 --- a/kernel/bpf/verifier.c +++ b/kernel/bpf/verifier.c @@ -1727,8 +1727,10 @@ static struct btf *__find_kfunc_desc_btf(struct bpf_verifier_env *env, return ERR_PTR(-EFAULT); btf = btf_get_by_fd(btf_fd); - if (IS_ERR(btf)) + if (IS_ERR(btf)) { + verbose(env, "invalid module BTF fd specified\n"); return btf; + } if (!btf_is_module(btf)) { verbose(env, "BTF fd for kfunc is not a module BTF\n"); @@ -1771,8 +1773,6 @@ static struct btf *find_kfunc_desc_btf(struct bpf_verifier_env *env, u32 func_id, s16 offset, struct module **btf_modp) { - struct btf *kfunc_btf; - if (offset) { if (offset < 0) { /* In the future, this can be allowed to increase limit @@ -1782,12 +1782,7 @@ static struct btf *find_kfunc_desc_btf(struct bpf_verifier_env *env, return ERR_PTR(-EINVAL); } - kfunc_btf = __find_kfunc_desc_btf(env, offset, btf_modp); - if (IS_ERR_OR_NULL(kfunc_btf)) { - verbose(env, "cannot find module BTF for func_id %u\n", func_id); - return kfunc_btf ?: ERR_PTR(-ENOENT); - } - return kfunc_btf; + return __find_kfunc_desc_btf(env, offset, btf_modp); } return btf_vmlinux ?: ERR_PTR(-ENOENT); } From ebc7b50a3849d73665013573cf3c09f27fb14fde Mon Sep 17 00:00:00 2001 From: Dave Marchevsky Date: Mon, 11 Oct 2021 01:20:28 -0700 Subject: [PATCH 068/181] libbpf: Migrate internal use of bpf_program__get_prog_info_linear In preparation for bpf_program__get_prog_info_linear deprecation, move the single use in libbpf to call bpf_obj_get_info_by_fd directly. Signed-off-by: Dave Marchevsky Signed-off-by: Andrii Nakryiko Link: https://lore.kernel.org/bpf/20211011082031.4148337-2-davemarchevsky@fb.com --- tools/lib/bpf/libbpf.c | 22 ++++++++++------------ 1 file changed, 10 insertions(+), 12 deletions(-) diff --git a/tools/lib/bpf/libbpf.c b/tools/lib/bpf/libbpf.c index 63d738654ff69..760c7e3466038 100644 --- a/tools/lib/bpf/libbpf.c +++ b/tools/lib/bpf/libbpf.c @@ -8461,28 +8461,27 @@ int libbpf_find_vmlinux_btf_id(const char *name, static int libbpf_find_prog_btf_id(const char *name, __u32 attach_prog_fd) { - struct bpf_prog_info_linear *info_linear; - struct bpf_prog_info *info; + struct bpf_prog_info info = {}; + __u32 info_len = sizeof(info); struct btf *btf; int err; - info_linear = bpf_program__get_prog_info_linear(attach_prog_fd, 0); - err = libbpf_get_error(info_linear); + err = bpf_obj_get_info_by_fd(attach_prog_fd, &info, &info_len); if (err) { - pr_warn("failed get_prog_info_linear for FD %d\n", - attach_prog_fd); + pr_warn("failed bpf_obj_get_info_by_fd for FD %d: %d\n", + attach_prog_fd, err); return err; } err = -EINVAL; - info = &info_linear->info; - if (!info->btf_id) { + if (!info.btf_id) { pr_warn("The target program doesn't have BTF\n"); goto out; } - btf = btf__load_from_kernel_by_id(info->btf_id); - if (libbpf_get_error(btf)) { - pr_warn("Failed to get BTF of the program\n"); + btf = btf__load_from_kernel_by_id(info.btf_id); + err = libbpf_get_error(btf); + if (err) { + pr_warn("Failed to get BTF %d of the program: %d\n", info.btf_id, err); goto out; } err = btf__find_by_name_kind(btf, name, BTF_KIND_FUNC); @@ -8492,7 +8491,6 @@ static int libbpf_find_prog_btf_id(const char *name, __u32 attach_prog_fd) goto out; } out: - free(info_linear); return err; } From 5f52d47c5f75a955ae29c59bed2ca886faba0ff4 Mon Sep 17 00:00:00 2001 From: Quentin Monnet Date: Wed, 20 Oct 2021 10:46:47 +0100 Subject: [PATCH 069/181] bpf/preload: Clean up .gitignore and "clean-files" target kernel/bpf/preload/Makefile was recently updated to have it install libbpf's headers locally instead of pulling them from tools/lib/bpf. But two items still need to be addressed. First, the local .gitignore file was not adjusted to ignore the files generated in the new kernel/bpf/preload/libbpf output directory. Second, the "clean-files" target is now incorrect. The old artefacts names were not removed from the target, while the new ones were added incorrectly. This is because "clean-files" expects names relative to $(obj), but we passed the absolute path instead. This results in the output and header-destination directories for libbpf (and their contents) not being removed from kernel/bpf/preload on "make clean" from the root of the repository. This commit fixes both issues. Note that $(userprogs) needs not be added to "clean-files", because the cleaning infrastructure already accounts for it. Cleaning the files properly also prevents make from printing the following message, for builds coming after a "make clean": "make[4]: Nothing to be done for 'install_headers'." v2: Simplify the "clean-files" target. Fixes: bf60791741d4 ("bpf: preload: Install libbpf headers when building") Signed-off-by: Quentin Monnet Signed-off-by: Andrii Nakryiko Acked-by: John Fastabend Link: https://lore.kernel.org/bpf/20211020094647.15564-1-quentin@isovalent.com --- kernel/bpf/preload/.gitignore | 4 +--- kernel/bpf/preload/Makefile | 3 +-- 2 files changed, 2 insertions(+), 5 deletions(-) diff --git a/kernel/bpf/preload/.gitignore b/kernel/bpf/preload/.gitignore index 856a4c5ad0dd5..9452322902a54 100644 --- a/kernel/bpf/preload/.gitignore +++ b/kernel/bpf/preload/.gitignore @@ -1,4 +1,2 @@ -/FEATURE-DUMP.libbpf -/bpf_helper_defs.h -/feature +/libbpf /bpf_preload_umd diff --git a/kernel/bpf/preload/Makefile b/kernel/bpf/preload/Makefile index 469d35e890ebc..1400ac58178e8 100644 --- a/kernel/bpf/preload/Makefile +++ b/kernel/bpf/preload/Makefile @@ -27,8 +27,7 @@ userccflags += -I $(srctree)/tools/include/ -I $(srctree)/tools/include/uapi \ userprogs := bpf_preload_umd -clean-files := $(userprogs) bpf_helper_defs.h FEATURE-DUMP.libbpf staticobjs/ feature/ -clean-files += $(LIBBPF_OUT) $(LIBBPF_DESTDIR) +clean-files := libbpf/ $(obj)/iterators/iterators.o: | libbpf_hdrs From b8f49dce799f0c177ee6a56b3fd67e7fdc6e68c2 Mon Sep 17 00:00:00 2001 From: Wan Jiabing Date: Tue, 12 Oct 2021 10:32:30 +0800 Subject: [PATCH 070/181] selftests/bpf: Remove duplicated include in cgroup_helpers Fix following checkincludes.pl warning: ./scripts/checkincludes.pl tools/testing/selftests/bpf/cgroup_helpers.c tools/testing/selftests/bpf/cgroup_helpers.c: unistd.h is included more than once. Signed-off-by: Wan Jiabing Signed-off-by: Andrii Nakryiko Acked-by: John Fastabend Link: https://lore.kernel.org/bpf/20211012023231.19911-1-wanjiabing@vivo.com --- tools/testing/selftests/bpf/cgroup_helpers.c | 1 - 1 file changed, 1 deletion(-) diff --git a/tools/testing/selftests/bpf/cgroup_helpers.c b/tools/testing/selftests/bpf/cgroup_helpers.c index 8fcd44841bb28..9d59c3990ca8d 100644 --- a/tools/testing/selftests/bpf/cgroup_helpers.c +++ b/tools/testing/selftests/bpf/cgroup_helpers.c @@ -11,7 +11,6 @@ #include #include #include -#include #include "cgroup_helpers.h" From efc36d6c642a753c2880d97b8e8c3c7cfaf64ad1 Mon Sep 17 00:00:00 2001 From: Quentin Monnet Date: Wed, 20 Oct 2021 10:48:26 +0100 Subject: [PATCH 071/181] bpftool: Remove useless #include to from map_perf_ring.c The header is no longer needed since the event_pipe implementation was updated to rely on libbpf's perf_buffer. This makes bpftool free of dependencies to perf files, and we can update the Makefile accordingly. Fixes: 9b190f185d2f ("tools/bpftool: switch map event_pipe to libbpf's perf_buffer") Signed-off-by: Quentin Monnet Signed-off-by: Andrii Nakryiko Link: https://lore.kernel.org/bpf/20211020094826.16046-1-quentin@isovalent.com --- tools/bpf/bpftool/Makefile | 3 +-- tools/bpf/bpftool/map_perf_ring.c | 1 - 2 files changed, 1 insertion(+), 3 deletions(-) diff --git a/tools/bpf/bpftool/Makefile b/tools/bpf/bpftool/Makefile index abcef1f72d65b..098d762e111a7 100644 --- a/tools/bpf/bpftool/Makefile +++ b/tools/bpf/bpftool/Makefile @@ -73,8 +73,7 @@ CFLAGS += -DPACKAGE='"bpftool"' -D__EXPORTED_HEADERS__ \ -I$(LIBBPF_INCLUDE) \ -I$(srctree)/kernel/bpf/ \ -I$(srctree)/tools/include \ - -I$(srctree)/tools/include/uapi \ - -I$(srctree)/tools/perf + -I$(srctree)/tools/include/uapi CFLAGS += -DBPFTOOL_VERSION='"$(BPFTOOL_VERSION)"' ifneq ($(EXTRA_CFLAGS),) CFLAGS += $(EXTRA_CFLAGS) diff --git a/tools/bpf/bpftool/map_perf_ring.c b/tools/bpf/bpftool/map_perf_ring.c index 825f29f93a57a..b98ea702d2849 100644 --- a/tools/bpf/bpftool/map_perf_ring.c +++ b/tools/bpf/bpftool/map_perf_ring.c @@ -22,7 +22,6 @@ #include #include -#include #include "main.h" From b599015f044df53e93ad0a2957b615bc1a26bf73 Mon Sep 17 00:00:00 2001 From: David Yang Date: Tue, 12 Oct 2021 19:16:49 +0800 Subject: [PATCH 072/181] samples/bpf: Fix application of sizeof to pointer The coccinelle check report: "./samples/bpf/xdp_redirect_cpu_user.c:397:32-38: ERROR: application of sizeof to pointer" Using the "strlen" to fix it. Reported-by: Zeal Robot Signed-off-by: David Yang Signed-off-by: Andrii Nakryiko Link: https://lore.kernel.org/bpf/20211012111649.983253-1-davidcomponentone@gmail.com --- samples/bpf/xdp_redirect_cpu_user.c | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/samples/bpf/xdp_redirect_cpu_user.c b/samples/bpf/xdp_redirect_cpu_user.c index 6e25fba64c72b..d84e6949007cc 100644 --- a/samples/bpf/xdp_redirect_cpu_user.c +++ b/samples/bpf/xdp_redirect_cpu_user.c @@ -325,7 +325,6 @@ int main(int argc, char **argv) int add_cpu = -1; int ifindex = -1; int *cpu, i, opt; - char *ifname; __u32 qsize; int n_cpus; @@ -393,9 +392,8 @@ int main(int argc, char **argv) fprintf(stderr, "-d/--dev name too long\n"); goto end_cpu; } - ifname = (char *)&ifname_buf; - safe_strncpy(ifname, optarg, sizeof(ifname)); - ifindex = if_nametoindex(ifname); + safe_strncpy(ifname_buf, optarg, strlen(ifname_buf)); + ifindex = if_nametoindex(ifname_buf); if (!ifindex) ifindex = strtoul(optarg, NULL, 0); if (!ifindex) { From b16d12f3900283e00aded9131ba1e9b2880513c3 Mon Sep 17 00:00:00 2001 From: Ilya Leoshkevich Date: Wed, 13 Oct 2021 18:08:59 +0200 Subject: [PATCH 073/181] selftests/bpf: Use cpu_number only on arches that have it cpu_number exists only on Intel and aarch64, so skip the test involing it on other arches. An alternative would be to replace it with an exported non-ifdefed primitive-typed percpu variable from the common code, but there appears to be none. Signed-off-by: Ilya Leoshkevich Signed-off-by: Andrii Nakryiko Link: https://lore.kernel.org/bpf/20211013160902.428340-2-iii@linux.ibm.com --- tools/testing/selftests/bpf/prog_tests/btf_dump.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/tools/testing/selftests/bpf/prog_tests/btf_dump.c b/tools/testing/selftests/bpf/prog_tests/btf_dump.c index 87f9df653e4e2..12f457b6786dd 100644 --- a/tools/testing/selftests/bpf/prog_tests/btf_dump.c +++ b/tools/testing/selftests/bpf/prog_tests/btf_dump.c @@ -778,8 +778,10 @@ static void test_btf_dump_struct_data(struct btf *btf, struct btf_dump *d, static void test_btf_dump_var_data(struct btf *btf, struct btf_dump *d, char *str) { +#if defined(__i386__) || defined(__x86_64__) || defined(__aarch64__) TEST_BTF_DUMP_VAR(btf, d, NULL, str, "cpu_number", int, BTF_F_COMPACT, "int cpu_number = (int)100", 100); +#endif TEST_BTF_DUMP_VAR(btf, d, NULL, str, "cpu_profile_flip", int, BTF_F_COMPACT, "static int cpu_profile_flip = (int)2", 2); } From c9e982b879465ca74e3593ce82808aa259265a71 Mon Sep 17 00:00:00 2001 From: Ilya Leoshkevich Date: Wed, 13 Oct 2021 18:09:00 +0200 Subject: [PATCH 074/181] libbpf: Fix dumping big-endian bitfields On big-endian arches not only bytes, but also bits are numbered in reverse order (see e.g. S/390 ELF ABI Supplement, but this is also true for other big-endian arches as well). Signed-off-by: Ilya Leoshkevich Signed-off-by: Andrii Nakryiko Link: https://lore.kernel.org/bpf/20211013160902.428340-3-iii@linux.ibm.com --- tools/lib/bpf/btf_dump.c | 15 +++++++-------- 1 file changed, 7 insertions(+), 8 deletions(-) diff --git a/tools/lib/bpf/btf_dump.c b/tools/lib/bpf/btf_dump.c index 5ef42f0abed12..679bf34e3f47f 100644 --- a/tools/lib/bpf/btf_dump.c +++ b/tools/lib/bpf/btf_dump.c @@ -1562,29 +1562,28 @@ static int btf_dump_get_bitfield_value(struct btf_dump *d, __u64 *value) { __u16 left_shift_bits, right_shift_bits; - __u8 nr_copy_bits, nr_copy_bytes; const __u8 *bytes = data; - int sz = t->size; + __u8 nr_copy_bits; __u64 num = 0; int i; /* Maximum supported bitfield size is 64 bits */ - if (sz > 8) { - pr_warn("unexpected bitfield size %d\n", sz); + if (t->size > 8) { + pr_warn("unexpected bitfield size %d\n", t->size); return -EINVAL; } /* Bitfield value retrieval is done in two steps; first relevant bytes are * stored in num, then we left/right shift num to eliminate irrelevant bits. */ - nr_copy_bits = bit_sz + bits_offset; - nr_copy_bytes = t->size; #if __BYTE_ORDER == __LITTLE_ENDIAN - for (i = nr_copy_bytes - 1; i >= 0; i--) + for (i = t->size - 1; i >= 0; i--) num = num * 256 + bytes[i]; + nr_copy_bits = bit_sz + bits_offset; #elif __BYTE_ORDER == __BIG_ENDIAN - for (i = 0; i < nr_copy_bytes; i++) + for (i = 0; i < t->size; i++) num = num * 256 + bytes[i]; + nr_copy_bits = t->size * 8 - bits_offset; #else # error "Unrecognized __BYTE_ORDER__" #endif From 961632d5416389a6f2e7478b674ac645c7e3ff01 Mon Sep 17 00:00:00 2001 From: Ilya Leoshkevich Date: Wed, 13 Oct 2021 18:09:01 +0200 Subject: [PATCH 075/181] libbpf: Fix dumping non-aligned __int128 Non-aligned integers are dumped as bitfields, which is supported for at most 64-bit integers. Fix by using the same trick as btf_dump_float_data(): copy non-aligned values to the local buffer. Signed-off-by: Ilya Leoshkevich Signed-off-by: Andrii Nakryiko Link: https://lore.kernel.org/bpf/20211013160902.428340-4-iii@linux.ibm.com --- tools/lib/bpf/btf_dump.c | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/tools/lib/bpf/btf_dump.c b/tools/lib/bpf/btf_dump.c index 679bf34e3f47f..e98c201f29b51 100644 --- a/tools/lib/bpf/btf_dump.c +++ b/tools/lib/bpf/btf_dump.c @@ -1670,9 +1670,10 @@ static int btf_dump_int_data(struct btf_dump *d, { __u8 encoding = btf_int_encoding(t); bool sign = encoding & BTF_INT_SIGNED; + char buf[16] __aligned(16); int sz = t->size; - if (sz == 0) { + if (sz == 0 || sz > sizeof(buf)) { pr_warn("unexpected size %d for id [%u]\n", sz, type_id); return -EINVAL; } @@ -1680,8 +1681,10 @@ static int btf_dump_int_data(struct btf_dump *d, /* handle packed int data - accesses of integers not aligned on * int boundaries can cause problems on some platforms. */ - if (!ptr_is_aligned(data, sz)) - return btf_dump_bitfield_data(d, t, data, 0, 0); + if (!ptr_is_aligned(data, sz)) { + memcpy(buf, data, sz); + data = buf; + } switch (sz) { case 16: { From 7960d02dddccc0676d4a1b58e9964718b47388e1 Mon Sep 17 00:00:00 2001 From: Brendan Jackman Date: Fri, 15 Oct 2021 09:33:18 +0000 Subject: [PATCH 076/181] selftests/bpf: Some more atomic tests Some new verifier tests that hit some important gaps in the parameter space for atomic ops. There are already exhaustive tests for the JIT part in lib/test_bpf.c, but these exercise the verifier too. Signed-off-by: Brendan Jackman Signed-off-by: Alexei Starovoitov Link: https://lore.kernel.org/bpf/20211015093318.1273686-1-jackmanb@google.com --- .../selftests/bpf/verifier/atomic_cmpxchg.c | 38 +++++++++++++ .../selftests/bpf/verifier/atomic_fetch.c | 57 +++++++++++++++++++ .../selftests/bpf/verifier/atomic_invalid.c | 25 ++++++++ 3 files changed, 120 insertions(+) create mode 100644 tools/testing/selftests/bpf/verifier/atomic_fetch.c create mode 100644 tools/testing/selftests/bpf/verifier/atomic_invalid.c diff --git a/tools/testing/selftests/bpf/verifier/atomic_cmpxchg.c b/tools/testing/selftests/bpf/verifier/atomic_cmpxchg.c index 6e52dfc644153..c22dc83a41fdc 100644 --- a/tools/testing/selftests/bpf/verifier/atomic_cmpxchg.c +++ b/tools/testing/selftests/bpf/verifier/atomic_cmpxchg.c @@ -119,3 +119,41 @@ }, .result = ACCEPT, }, +{ + "Dest pointer in r0 - fail", + .insns = { + /* val = 0; */ + BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0), + /* r0 = &val */ + BPF_MOV64_REG(BPF_REG_0, BPF_REG_10), + /* r0 = atomic_cmpxchg(&val, r0, 1); */ + BPF_MOV64_IMM(BPF_REG_1, 1), + BPF_ATOMIC_OP(BPF_DW, BPF_CMPXCHG, BPF_REG_10, BPF_REG_1, -8), + /* if (r0 != 0) exit(1); */ + BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 2), + BPF_MOV64_IMM(BPF_REG_0, 1), + BPF_EXIT_INSN(), + /* exit(0); */ + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_EXIT_INSN(), + }, + .result = ACCEPT, +}, +{ + "Dest pointer in r0 - succeed", + .insns = { + /* r0 = &val */ + BPF_MOV64_REG(BPF_REG_0, BPF_REG_10), + /* val = r0; */ + BPF_STX_MEM(BPF_DW, BPF_REG_10, BPF_REG_0, -8), + /* r0 = atomic_cmpxchg(&val, r0, 0); */ + BPF_MOV64_IMM(BPF_REG_1, 0), + BPF_ATOMIC_OP(BPF_DW, BPF_CMPXCHG, BPF_REG_10, BPF_REG_1, -8), + /* r1 = *r0 */ + BPF_LDX_MEM(BPF_DW, BPF_REG_1, BPF_REG_0, -8), + /* exit(0); */ + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_EXIT_INSN(), + }, + .result = ACCEPT, +}, diff --git a/tools/testing/selftests/bpf/verifier/atomic_fetch.c b/tools/testing/selftests/bpf/verifier/atomic_fetch.c new file mode 100644 index 0000000000000..3bc9ff7a860b7 --- /dev/null +++ b/tools/testing/selftests/bpf/verifier/atomic_fetch.c @@ -0,0 +1,57 @@ +#define __ATOMIC_FETCH_OP_TEST(src_reg, dst_reg, operand1, op, operand2, expect) \ + { \ + "atomic fetch " #op ", src=" #dst_reg " dst=" #dst_reg, \ + .insns = { \ + /* u64 val = operan1; */ \ + BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, operand1), \ + /* u64 old = atomic_fetch_add(&val, operand2); */ \ + BPF_MOV64_REG(dst_reg, BPF_REG_10), \ + BPF_MOV64_IMM(src_reg, operand2), \ + BPF_ATOMIC_OP(BPF_DW, op, \ + dst_reg, src_reg, -8), \ + /* if (old != operand1) exit(1); */ \ + BPF_JMP_IMM(BPF_JEQ, src_reg, operand1, 2), \ + BPF_MOV64_IMM(BPF_REG_0, 1), \ + BPF_EXIT_INSN(), \ + /* if (val != result) exit (2); */ \ + BPF_LDX_MEM(BPF_DW, BPF_REG_1, BPF_REG_10, -8), \ + BPF_JMP_IMM(BPF_JEQ, BPF_REG_1, expect, 2), \ + BPF_MOV64_IMM(BPF_REG_0, 2), \ + BPF_EXIT_INSN(), \ + /* exit(0); */ \ + BPF_MOV64_IMM(BPF_REG_0, 0), \ + BPF_EXIT_INSN(), \ + }, \ + .result = ACCEPT, \ + } +__ATOMIC_FETCH_OP_TEST(BPF_REG_1, BPF_REG_2, 1, BPF_ADD | BPF_FETCH, 2, 3), +__ATOMIC_FETCH_OP_TEST(BPF_REG_0, BPF_REG_1, 1, BPF_ADD | BPF_FETCH, 2, 3), +__ATOMIC_FETCH_OP_TEST(BPF_REG_1, BPF_REG_0, 1, BPF_ADD | BPF_FETCH, 2, 3), +__ATOMIC_FETCH_OP_TEST(BPF_REG_2, BPF_REG_3, 1, BPF_ADD | BPF_FETCH, 2, 3), +__ATOMIC_FETCH_OP_TEST(BPF_REG_4, BPF_REG_5, 1, BPF_ADD | BPF_FETCH, 2, 3), +__ATOMIC_FETCH_OP_TEST(BPF_REG_9, BPF_REG_8, 1, BPF_ADD | BPF_FETCH, 2, 3), +__ATOMIC_FETCH_OP_TEST(BPF_REG_1, BPF_REG_2, 0x010, BPF_AND | BPF_FETCH, 0x011, 0x010), +__ATOMIC_FETCH_OP_TEST(BPF_REG_0, BPF_REG_1, 0x010, BPF_AND | BPF_FETCH, 0x011, 0x010), +__ATOMIC_FETCH_OP_TEST(BPF_REG_1, BPF_REG_0, 0x010, BPF_AND | BPF_FETCH, 0x011, 0x010), +__ATOMIC_FETCH_OP_TEST(BPF_REG_2, BPF_REG_3, 0x010, BPF_AND | BPF_FETCH, 0x011, 0x010), +__ATOMIC_FETCH_OP_TEST(BPF_REG_4, BPF_REG_5, 0x010, BPF_AND | BPF_FETCH, 0x011, 0x010), +__ATOMIC_FETCH_OP_TEST(BPF_REG_9, BPF_REG_8, 0x010, BPF_AND | BPF_FETCH, 0x011, 0x010), +__ATOMIC_FETCH_OP_TEST(BPF_REG_1, BPF_REG_2, 0x010, BPF_OR | BPF_FETCH, 0x011, 0x011), +__ATOMIC_FETCH_OP_TEST(BPF_REG_0, BPF_REG_1, 0x010, BPF_OR | BPF_FETCH, 0x011, 0x011), +__ATOMIC_FETCH_OP_TEST(BPF_REG_1, BPF_REG_0, 0x010, BPF_OR | BPF_FETCH, 0x011, 0x011), +__ATOMIC_FETCH_OP_TEST(BPF_REG_2, BPF_REG_3, 0x010, BPF_OR | BPF_FETCH, 0x011, 0x011), +__ATOMIC_FETCH_OP_TEST(BPF_REG_4, BPF_REG_5, 0x010, BPF_OR | BPF_FETCH, 0x011, 0x011), +__ATOMIC_FETCH_OP_TEST(BPF_REG_9, BPF_REG_8, 0x010, BPF_OR | BPF_FETCH, 0x011, 0x011), +__ATOMIC_FETCH_OP_TEST(BPF_REG_1, BPF_REG_2, 0x010, BPF_XOR | BPF_FETCH, 0x011, 0x001), +__ATOMIC_FETCH_OP_TEST(BPF_REG_0, BPF_REG_1, 0x010, BPF_XOR | BPF_FETCH, 0x011, 0x001), +__ATOMIC_FETCH_OP_TEST(BPF_REG_1, BPF_REG_0, 0x010, BPF_XOR | BPF_FETCH, 0x011, 0x001), +__ATOMIC_FETCH_OP_TEST(BPF_REG_2, BPF_REG_3, 0x010, BPF_XOR | BPF_FETCH, 0x011, 0x001), +__ATOMIC_FETCH_OP_TEST(BPF_REG_4, BPF_REG_5, 0x010, BPF_XOR | BPF_FETCH, 0x011, 0x001), +__ATOMIC_FETCH_OP_TEST(BPF_REG_9, BPF_REG_8, 0x010, BPF_XOR | BPF_FETCH, 0x011, 0x001), +__ATOMIC_FETCH_OP_TEST(BPF_REG_1, BPF_REG_2, 0x010, BPF_XCHG, 0x011, 0x011), +__ATOMIC_FETCH_OP_TEST(BPF_REG_0, BPF_REG_1, 0x010, BPF_XCHG, 0x011, 0x011), +__ATOMIC_FETCH_OP_TEST(BPF_REG_1, BPF_REG_0, 0x010, BPF_XCHG, 0x011, 0x011), +__ATOMIC_FETCH_OP_TEST(BPF_REG_2, BPF_REG_3, 0x010, BPF_XCHG, 0x011, 0x011), +__ATOMIC_FETCH_OP_TEST(BPF_REG_4, BPF_REG_5, 0x010, BPF_XCHG, 0x011, 0x011), +__ATOMIC_FETCH_OP_TEST(BPF_REG_9, BPF_REG_8, 0x010, BPF_XCHG, 0x011, 0x011), +#undef __ATOMIC_FETCH_OP_TEST diff --git a/tools/testing/selftests/bpf/verifier/atomic_invalid.c b/tools/testing/selftests/bpf/verifier/atomic_invalid.c new file mode 100644 index 0000000000000..39272720b2f6f --- /dev/null +++ b/tools/testing/selftests/bpf/verifier/atomic_invalid.c @@ -0,0 +1,25 @@ +#define __INVALID_ATOMIC_ACCESS_TEST(op) \ + { \ + "atomic " #op " access through non-pointer ", \ + .insns = { \ + BPF_MOV64_IMM(BPF_REG_0, 1), \ + BPF_MOV64_IMM(BPF_REG_1, 0), \ + BPF_ATOMIC_OP(BPF_DW, op, BPF_REG_1, BPF_REG_0, -8), \ + BPF_MOV64_IMM(BPF_REG_0, 0), \ + BPF_EXIT_INSN(), \ + }, \ + .result = REJECT, \ + .errstr = "R1 invalid mem access 'inv'" \ + } +__INVALID_ATOMIC_ACCESS_TEST(BPF_ADD), +__INVALID_ATOMIC_ACCESS_TEST(BPF_ADD | BPF_FETCH), +__INVALID_ATOMIC_ACCESS_TEST(BPF_ADD), +__INVALID_ATOMIC_ACCESS_TEST(BPF_ADD | BPF_FETCH), +__INVALID_ATOMIC_ACCESS_TEST(BPF_AND), +__INVALID_ATOMIC_ACCESS_TEST(BPF_AND | BPF_FETCH), +__INVALID_ATOMIC_ACCESS_TEST(BPF_OR), +__INVALID_ATOMIC_ACCESS_TEST(BPF_OR | BPF_FETCH), +__INVALID_ATOMIC_ACCESS_TEST(BPF_XOR), +__INVALID_ATOMIC_ACCESS_TEST(BPF_XOR | BPF_FETCH), +__INVALID_ATOMIC_ACCESS_TEST(BPF_XCHG), +__INVALID_ATOMIC_ACCESS_TEST(BPF_CMPXCHG), From db5b6a46f43aba3473c8b6b1d11aff1fa2aa354b Mon Sep 17 00:00:00 2001 From: Qing Wang Date: Mon, 18 Oct 2021 04:30:48 -0700 Subject: [PATCH 077/181] net: bpf: Switch over to memdup_user() This patch fixes the following Coccinelle warning: net/bpf/test_run.c:361:8-15: WARNING opportunity for memdup_user net/bpf/test_run.c:1055:8-15: WARNING opportunity for memdup_user Use memdup_user rather than duplicating its implementation This is a little bit restricted to reduce false positives Signed-off-by: Qing Wang Signed-off-by: Alexei Starovoitov Acked-by: John Fastabend Link: https://lore.kernel.org/bpf/1634556651-38702-1-git-send-email-wangqing@vivo.com --- net/bpf/test_run.c | 21 ++++++--------------- 1 file changed, 6 insertions(+), 15 deletions(-) diff --git a/net/bpf/test_run.c b/net/bpf/test_run.c index 529608784aa89..46dd957559672 100644 --- a/net/bpf/test_run.c +++ b/net/bpf/test_run.c @@ -358,13 +358,9 @@ int bpf_prog_test_run_raw_tp(struct bpf_prog *prog, return -EINVAL; if (ctx_size_in) { - info.ctx = kzalloc(ctx_size_in, GFP_USER); - if (!info.ctx) - return -ENOMEM; - if (copy_from_user(info.ctx, ctx_in, ctx_size_in)) { - err = -EFAULT; - goto out; - } + info.ctx = memdup_user(ctx_in, ctx_size_in); + if (IS_ERR(info.ctx)) + return PTR_ERR(info.ctx); } else { info.ctx = NULL; } @@ -392,7 +388,6 @@ int bpf_prog_test_run_raw_tp(struct bpf_prog *prog, copy_to_user(&uattr->test.retval, &info.retval, sizeof(u32))) err = -EFAULT; -out: kfree(info.ctx); return err; } @@ -1052,13 +1047,9 @@ int bpf_prog_test_run_syscall(struct bpf_prog *prog, return -EINVAL; if (ctx_size_in) { - ctx = kzalloc(ctx_size_in, GFP_USER); - if (!ctx) - return -ENOMEM; - if (copy_from_user(ctx, ctx_in, ctx_size_in)) { - err = -EFAULT; - goto out; - } + ctx = memdup_user(ctx_in, ctx_size_in); + if (IS_ERR(ctx)) + return PTR_ERR(ctx); } rcu_read_lock_trace(); From 44ce0ac11e4e6dd1aee2e41e7d74f95f3b961cfb Mon Sep 17 00:00:00 2001 From: Pu Lehui Date: Thu, 21 Oct 2021 20:39:13 +0800 Subject: [PATCH 078/181] samples: bpf: Suppress readelf stderr when probing for BTF support When compiling bpf samples, the following warning appears: readelf: Error: Missing knowledge of 32-bit reloc types used in DWARF sections of machine number 247 readelf: Warning: unable to apply unsupported reloc type 10 to section .debug_info readelf: Warning: unable to apply unsupported reloc type 1 to section .debug_info readelf: Warning: unable to apply unsupported reloc type 10 to section .debug_info Same problem was mentioned in commit 2f0921262ba9 ("selftests/bpf: suppress readelf stderr when probing for BTF support"), let's use readelf that supports btf. Signed-off-by: Pu Lehui Signed-off-by: Alexei Starovoitov Acked-by: John Fastabend Link: https://lore.kernel.org/bpf/20211021123913.48833-1-pulehui@huawei.com --- samples/bpf/Makefile | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/samples/bpf/Makefile b/samples/bpf/Makefile index 4c5ad15f8d288..c8106835f2099 100644 --- a/samples/bpf/Makefile +++ b/samples/bpf/Makefile @@ -229,6 +229,7 @@ CLANG ?= clang OPT ?= opt LLVM_DIS ?= llvm-dis LLVM_OBJCOPY ?= llvm-objcopy +LLVM_READELF ?= llvm-readelf BTF_PAHOLE ?= pahole # Detect that we're cross compiling and use the cross compiler @@ -252,7 +253,7 @@ BTF_PAHOLE_PROBE := $(shell $(BTF_PAHOLE) --help 2>&1 | grep BTF) BTF_OBJCOPY_PROBE := $(shell $(LLVM_OBJCOPY) --help 2>&1 | grep -i 'usage.*llvm') BTF_LLVM_PROBE := $(shell echo "int main() { return 0; }" | \ $(CLANG) -target bpf -O2 -g -c -x c - -o ./llvm_btf_verify.o; \ - readelf -S ./llvm_btf_verify.o | grep BTF; \ + $(LLVM_READELF) -S ./llvm_btf_verify.o | grep BTF; \ /bin/rm -f ./llvm_btf_verify.o) BPF_EXTRA_CFLAGS += -fno-stack-protector From 9eeb3aa33ae005526f672b394c1791578463513f Mon Sep 17 00:00:00 2001 From: Hengqi Chen Date: Thu, 21 Oct 2021 21:47:51 +0800 Subject: [PATCH 079/181] bpf: Add bpf_skc_to_unix_sock() helper The helper is used in tracing programs to cast a socket pointer to a unix_sock pointer. The return value could be NULL if the casting is illegal. Suggested-by: Yonghong Song Signed-off-by: Hengqi Chen Signed-off-by: Alexei Starovoitov Acked-by: Song Liu Link: https://lore.kernel.org/bpf/20211021134752.1223426-2-hengqi.chen@gmail.com --- include/linux/bpf.h | 1 + include/uapi/linux/bpf.h | 7 +++++++ kernel/trace/bpf_trace.c | 2 ++ net/core/filter.c | 23 +++++++++++++++++++++++ scripts/bpf_doc.py | 2 ++ tools/include/uapi/linux/bpf.h | 7 +++++++ 6 files changed, 42 insertions(+) diff --git a/include/linux/bpf.h b/include/linux/bpf.h index d604c8251d889..be3102b4554b6 100644 --- a/include/linux/bpf.h +++ b/include/linux/bpf.h @@ -2093,6 +2093,7 @@ extern const struct bpf_func_proto bpf_skc_to_tcp_sock_proto; extern const struct bpf_func_proto bpf_skc_to_tcp_timewait_sock_proto; extern const struct bpf_func_proto bpf_skc_to_tcp_request_sock_proto; extern const struct bpf_func_proto bpf_skc_to_udp6_sock_proto; +extern const struct bpf_func_proto bpf_skc_to_unix_sock_proto; extern const struct bpf_func_proto bpf_copy_from_user_proto; extern const struct bpf_func_proto bpf_snprintf_btf_proto; extern const struct bpf_func_proto bpf_snprintf_proto; diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h index 6fc59d61937ad..22e7a3f38b9ff 100644 --- a/include/uapi/linux/bpf.h +++ b/include/uapi/linux/bpf.h @@ -4909,6 +4909,12 @@ union bpf_attr { * Return * The number of bytes written to the buffer, or a negative error * in case of failure. + * + * struct unix_sock *bpf_skc_to_unix_sock(void *sk) + * Description + * Dynamically cast a *sk* pointer to a *unix_sock* pointer. + * Return + * *sk* if casting is valid, or **NULL** otherwise. */ #define __BPF_FUNC_MAPPER(FN) \ FN(unspec), \ @@ -5089,6 +5095,7 @@ union bpf_attr { FN(task_pt_regs), \ FN(get_branch_snapshot), \ FN(trace_vprintk), \ + FN(skc_to_unix_sock), \ /* */ /* integer value in 'imm' field of BPF_CALL instruction selects which helper diff --git a/kernel/trace/bpf_trace.c b/kernel/trace/bpf_trace.c index 6b3153841a33f..cbcd0d6fca7c7 100644 --- a/kernel/trace/bpf_trace.c +++ b/kernel/trace/bpf_trace.c @@ -1608,6 +1608,8 @@ tracing_prog_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog) return &bpf_skc_to_tcp_request_sock_proto; case BPF_FUNC_skc_to_udp6_sock: return &bpf_skc_to_udp6_sock_proto; + case BPF_FUNC_skc_to_unix_sock: + return &bpf_skc_to_unix_sock_proto; case BPF_FUNC_sk_storage_get: return &bpf_sk_storage_get_tracing_proto; case BPF_FUNC_sk_storage_delete: diff --git a/net/core/filter.c b/net/core/filter.c index 4bace37a6a449..8e8d3b49c2976 100644 --- a/net/core/filter.c +++ b/net/core/filter.c @@ -10723,6 +10723,26 @@ const struct bpf_func_proto bpf_skc_to_udp6_sock_proto = { .ret_btf_id = &btf_sock_ids[BTF_SOCK_TYPE_UDP6], }; +BPF_CALL_1(bpf_skc_to_unix_sock, struct sock *, sk) +{ + /* unix_sock type is not generated in dwarf and hence btf, + * trigger an explicit type generation here. + */ + BTF_TYPE_EMIT(struct unix_sock); + if (sk && sk_fullsock(sk) && sk->sk_family == AF_UNIX) + return (unsigned long)sk; + + return (unsigned long)NULL; +} + +const struct bpf_func_proto bpf_skc_to_unix_sock_proto = { + .func = bpf_skc_to_unix_sock, + .gpl_only = false, + .ret_type = RET_PTR_TO_BTF_ID_OR_NULL, + .arg1_type = ARG_PTR_TO_BTF_ID_SOCK_COMMON, + .ret_btf_id = &btf_sock_ids[BTF_SOCK_TYPE_UNIX], +}; + BPF_CALL_1(bpf_sock_from_file, struct file *, file) { return (unsigned long)sock_from_file(file); @@ -10762,6 +10782,9 @@ bpf_sk_base_func_proto(enum bpf_func_id func_id) case BPF_FUNC_skc_to_udp6_sock: func = &bpf_skc_to_udp6_sock_proto; break; + case BPF_FUNC_skc_to_unix_sock: + func = &bpf_skc_to_unix_sock_proto; + break; default: return bpf_base_func_proto(func_id); } diff --git a/scripts/bpf_doc.py b/scripts/bpf_doc.py index 00ac7b79cddb4..a6403ddf5de72 100755 --- a/scripts/bpf_doc.py +++ b/scripts/bpf_doc.py @@ -537,6 +537,7 @@ def __init__(self, parser): 'struct tcp_timewait_sock', 'struct tcp_request_sock', 'struct udp6_sock', + 'struct unix_sock', 'struct task_struct', 'struct __sk_buff', @@ -589,6 +590,7 @@ def __init__(self, parser): 'struct tcp_timewait_sock', 'struct tcp_request_sock', 'struct udp6_sock', + 'struct unix_sock', 'struct task_struct', 'struct path', 'struct btf_ptr', diff --git a/tools/include/uapi/linux/bpf.h b/tools/include/uapi/linux/bpf.h index 6fc59d61937ad..22e7a3f38b9ff 100644 --- a/tools/include/uapi/linux/bpf.h +++ b/tools/include/uapi/linux/bpf.h @@ -4909,6 +4909,12 @@ union bpf_attr { * Return * The number of bytes written to the buffer, or a negative error * in case of failure. + * + * struct unix_sock *bpf_skc_to_unix_sock(void *sk) + * Description + * Dynamically cast a *sk* pointer to a *unix_sock* pointer. + * Return + * *sk* if casting is valid, or **NULL** otherwise. */ #define __BPF_FUNC_MAPPER(FN) \ FN(unspec), \ @@ -5089,6 +5095,7 @@ union bpf_attr { FN(task_pt_regs), \ FN(get_branch_snapshot), \ FN(trace_vprintk), \ + FN(skc_to_unix_sock), \ /* */ /* integer value in 'imm' field of BPF_CALL instruction selects which helper From b6c4e71516099be676ebd897ea440ce2dddca6d1 Mon Sep 17 00:00:00 2001 From: Hengqi Chen Date: Thu, 21 Oct 2021 21:47:52 +0800 Subject: [PATCH 080/181] selftests/bpf: Test bpf_skc_to_unix_sock() helper Add a new test which triggers unix_listen kernel function to test bpf_skc_to_unix_sock helper. Signed-off-by: Hengqi Chen Signed-off-by: Alexei Starovoitov Link: https://lore.kernel.org/bpf/20211021134752.1223426-3-hengqi.chen@gmail.com --- .../bpf/prog_tests/skc_to_unix_sock.c | 54 +++++++++++++++++++ .../bpf/progs/test_skc_to_unix_sock.c | 40 ++++++++++++++ 2 files changed, 94 insertions(+) create mode 100644 tools/testing/selftests/bpf/prog_tests/skc_to_unix_sock.c create mode 100644 tools/testing/selftests/bpf/progs/test_skc_to_unix_sock.c diff --git a/tools/testing/selftests/bpf/prog_tests/skc_to_unix_sock.c b/tools/testing/selftests/bpf/prog_tests/skc_to_unix_sock.c new file mode 100644 index 0000000000000..3eefdfed1db93 --- /dev/null +++ b/tools/testing/selftests/bpf/prog_tests/skc_to_unix_sock.c @@ -0,0 +1,54 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* Copyright (c) 2021 Hengqi Chen */ + +#include +#include +#include "test_skc_to_unix_sock.skel.h" + +static const char *sock_path = "@skc_to_unix_sock"; + +void test_skc_to_unix_sock(void) +{ + struct test_skc_to_unix_sock *skel; + struct sockaddr_un sockaddr; + int err, sockfd = 0; + + skel = test_skc_to_unix_sock__open(); + if (!ASSERT_OK_PTR(skel, "could not open BPF object")) + return; + + skel->rodata->my_pid = getpid(); + + err = test_skc_to_unix_sock__load(skel); + if (!ASSERT_OK(err, "could not load BPF object")) + goto cleanup; + + err = test_skc_to_unix_sock__attach(skel); + if (!ASSERT_OK(err, "could not attach BPF object")) + goto cleanup; + + /* trigger unix_listen */ + sockfd = socket(AF_UNIX, SOCK_STREAM, 0); + if (!ASSERT_GT(sockfd, 0, "socket failed")) + goto cleanup; + + memset(&sockaddr, 0, sizeof(sockaddr)); + sockaddr.sun_family = AF_UNIX; + strncpy(sockaddr.sun_path, sock_path, strlen(sock_path)); + sockaddr.sun_path[0] = '\0'; + + err = bind(sockfd, (struct sockaddr *)&sockaddr, sizeof(sockaddr)); + if (!ASSERT_OK(err, "bind failed")) + goto cleanup; + + err = listen(sockfd, 1); + if (!ASSERT_OK(err, "listen failed")) + goto cleanup; + + ASSERT_EQ(strcmp(skel->bss->path, sock_path), 0, "bpf_skc_to_unix_sock failed"); + +cleanup: + if (sockfd) + close(sockfd); + test_skc_to_unix_sock__destroy(skel); +} diff --git a/tools/testing/selftests/bpf/progs/test_skc_to_unix_sock.c b/tools/testing/selftests/bpf/progs/test_skc_to_unix_sock.c new file mode 100644 index 0000000000000..a408ec95cba49 --- /dev/null +++ b/tools/testing/selftests/bpf/progs/test_skc_to_unix_sock.c @@ -0,0 +1,40 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* Copyright (c) 2021 Hengqi Chen */ + +#include "vmlinux.h" +#include +#include +#include "bpf_tracing_net.h" + +const volatile pid_t my_pid = 0; +char path[256] = {}; + +SEC("fentry/unix_listen") +int BPF_PROG(unix_listen, struct socket *sock, int backlog) +{ + pid_t pid = bpf_get_current_pid_tgid() >> 32; + struct unix_sock *unix_sk; + int i, len; + + if (pid != my_pid) + return 0; + + unix_sk = (struct unix_sock *)bpf_skc_to_unix_sock(sock->sk); + if (!unix_sk) + return 0; + + if (!UNIX_ABSTRACT(unix_sk)) + return 0; + + len = unix_sk->addr->len - sizeof(short); + path[0] = '@'; + for (i = 1; i < len; i++) { + if (i >= sizeof(struct sockaddr_un)) + break; + + path[i] = unix_sk->addr->name->sun_path[i]; + } + return 0; +} + +char _license[] SEC("license") = "GPL"; From 632f96d2652e0d4188de837f88a977eedb55aee1 Mon Sep 17 00:00:00 2001 From: Ilya Leoshkevich Date: Thu, 21 Oct 2021 12:46:58 +0200 Subject: [PATCH 081/181] libbpf: Fix ptr_is_aligned() usages Currently ptr_is_aligned() takes size, and not alignment, as a parameter, which may be overly pessimistic e.g. for __i128 on s390, which must be only 8-byte aligned. Fix by using btf__align_of(). Signed-off-by: Ilya Leoshkevich Signed-off-by: Andrii Nakryiko Link: https://lore.kernel.org/bpf/20211021104658.624944-2-iii@linux.ibm.com --- tools/lib/bpf/btf_dump.c | 20 ++++++++++++-------- 1 file changed, 12 insertions(+), 8 deletions(-) diff --git a/tools/lib/bpf/btf_dump.c b/tools/lib/bpf/btf_dump.c index e98c201f29b51..e9e5801ece4ca 100644 --- a/tools/lib/bpf/btf_dump.c +++ b/tools/lib/bpf/btf_dump.c @@ -1657,9 +1657,15 @@ static int btf_dump_base_type_check_zero(struct btf_dump *d, return 0; } -static bool ptr_is_aligned(const void *data, int data_sz) +static bool ptr_is_aligned(const struct btf *btf, __u32 type_id, + const void *data) { - return ((uintptr_t)data) % data_sz == 0; + int alignment = btf__align_of(btf, type_id); + + if (alignment == 0) + return false; + + return ((uintptr_t)data) % alignment == 0; } static int btf_dump_int_data(struct btf_dump *d, @@ -1681,7 +1687,7 @@ static int btf_dump_int_data(struct btf_dump *d, /* handle packed int data - accesses of integers not aligned on * int boundaries can cause problems on some platforms. */ - if (!ptr_is_aligned(data, sz)) { + if (!ptr_is_aligned(d->btf, type_id, data)) { memcpy(buf, data, sz); data = buf; } @@ -1770,7 +1776,7 @@ static int btf_dump_float_data(struct btf_dump *d, int sz = t->size; /* handle unaligned data; copy to local union */ - if (!ptr_is_aligned(data, sz)) { + if (!ptr_is_aligned(d->btf, type_id, data)) { memcpy(&fl, data, sz); flp = &fl; } @@ -1933,7 +1939,7 @@ static int btf_dump_ptr_data(struct btf_dump *d, __u32 id, const void *data) { - if (ptr_is_aligned(data, d->ptr_sz) && d->ptr_sz == sizeof(void *)) { + if (ptr_is_aligned(d->btf, id, data) && d->ptr_sz == sizeof(void *)) { btf_dump_type_values(d, "%p", *(void **)data); } else { union ptr_data pt; @@ -1953,10 +1959,8 @@ static int btf_dump_get_enum_value(struct btf_dump *d, __u32 id, __s64 *value) { - int sz = t->size; - /* handle unaligned enum value */ - if (!ptr_is_aligned(data, sz)) { + if (!ptr_is_aligned(d->btf, id, data)) { __u64 val; int err; From aba64c7da98330141dcdadd5612f088043a83696 Mon Sep 17 00:00:00 2001 From: Dave Marchevsky Date: Wed, 20 Oct 2021 00:48:17 -0700 Subject: [PATCH 082/181] bpf: Add verified_insns to bpf_prog_info and fdinfo This stat is currently printed in the verifier log and not stored anywhere. To ease consumption of this data, add a field to bpf_prog_aux so it can be exposed via BPF_OBJ_GET_INFO_BY_FD and fdinfo. Signed-off-by: Dave Marchevsky Signed-off-by: Andrii Nakryiko Acked-by: John Fastabend Link: https://lore.kernel.org/bpf/20211020074818.1017682-2-davemarchevsky@fb.com --- include/linux/bpf.h | 1 + include/uapi/linux/bpf.h | 1 + kernel/bpf/syscall.c | 8 ++++++-- kernel/bpf/verifier.c | 1 + tools/include/uapi/linux/bpf.h | 1 + 5 files changed, 10 insertions(+), 2 deletions(-) diff --git a/include/linux/bpf.h b/include/linux/bpf.h index be3102b4554b6..31421c74ba081 100644 --- a/include/linux/bpf.h +++ b/include/linux/bpf.h @@ -887,6 +887,7 @@ struct bpf_prog_aux { struct bpf_prog *prog; struct user_struct *user; u64 load_time; /* ns since boottime */ + u32 verified_insns; struct bpf_map *cgroup_storage[MAX_BPF_CGROUP_STORAGE_TYPE]; char name[BPF_OBJ_NAME_LEN]; #ifdef CONFIG_SECURITY diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h index 22e7a3f38b9ff..c108200378834 100644 --- a/include/uapi/linux/bpf.h +++ b/include/uapi/linux/bpf.h @@ -5620,6 +5620,7 @@ struct bpf_prog_info { __u64 run_time_ns; __u64 run_cnt; __u64 recursion_misses; + __u32 verified_insns; } __attribute__((aligned(8))); struct bpf_map_info { diff --git a/kernel/bpf/syscall.c b/kernel/bpf/syscall.c index 4e50c0bfdb7d3..5beb321b3b3b6 100644 --- a/kernel/bpf/syscall.c +++ b/kernel/bpf/syscall.c @@ -1848,7 +1848,8 @@ static void bpf_prog_show_fdinfo(struct seq_file *m, struct file *filp) "prog_id:\t%u\n" "run_time_ns:\t%llu\n" "run_cnt:\t%llu\n" - "recursion_misses:\t%llu\n", + "recursion_misses:\t%llu\n" + "verified_insns:\t%u\n", prog->type, prog->jited, prog_tag, @@ -1856,7 +1857,8 @@ static void bpf_prog_show_fdinfo(struct seq_file *m, struct file *filp) prog->aux->id, stats.nsecs, stats.cnt, - stats.misses); + stats.misses, + prog->aux->verified_insns); } #endif @@ -3625,6 +3627,8 @@ static int bpf_prog_get_info_by_fd(struct file *file, info.run_cnt = stats.cnt; info.recursion_misses = stats.misses; + info.verified_insns = prog->aux->verified_insns; + if (!bpf_capable()) { info.jited_prog_len = 0; info.xlated_prog_len = 0; diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c index 21cdff35a2f9e..c6616e3258038 100644 --- a/kernel/bpf/verifier.c +++ b/kernel/bpf/verifier.c @@ -14033,6 +14033,7 @@ int bpf_check(struct bpf_prog **prog, union bpf_attr *attr, bpfptr_t uattr) env->verification_time = ktime_get_ns() - start_time; print_verification_stats(env); + env->prog->aux->verified_insns = env->insn_processed; if (log->level && bpf_verifier_log_full(log)) ret = -ENOSPC; diff --git a/tools/include/uapi/linux/bpf.h b/tools/include/uapi/linux/bpf.h index 22e7a3f38b9ff..c108200378834 100644 --- a/tools/include/uapi/linux/bpf.h +++ b/tools/include/uapi/linux/bpf.h @@ -5620,6 +5620,7 @@ struct bpf_prog_info { __u64 run_time_ns; __u64 run_cnt; __u64 recursion_misses; + __u32 verified_insns; } __attribute__((aligned(8))); struct bpf_map_info { From e1b9023fc7ab0dcfeefe9f0a14772ad7a1c37b5b Mon Sep 17 00:00:00 2001 From: Dave Marchevsky Date: Wed, 20 Oct 2021 00:48:18 -0700 Subject: [PATCH 083/181] selftests/bpf: Add verif_stats test verified_insns field was added to response of bpf_obj_get_info_by_fd call on a prog. Confirm that it's being populated by loading a simple program and asking for its info. Signed-off-by: Dave Marchevsky Signed-off-by: Andrii Nakryiko Acked-by: John Fastabend Acked-by: Andrii Nakryiko Link: https://lore.kernel.org/bpf/20211020074818.1017682-3-davemarchevsky@fb.com --- .../selftests/bpf/prog_tests/verif_stats.c | 28 +++++++++++++++++++ 1 file changed, 28 insertions(+) create mode 100644 tools/testing/selftests/bpf/prog_tests/verif_stats.c diff --git a/tools/testing/selftests/bpf/prog_tests/verif_stats.c b/tools/testing/selftests/bpf/prog_tests/verif_stats.c new file mode 100644 index 0000000000000..b4bae1340cf1a --- /dev/null +++ b/tools/testing/selftests/bpf/prog_tests/verif_stats.c @@ -0,0 +1,28 @@ +// SPDX-License-Identifier: GPL-2.0 +/* Copyright (c) 2021 Facebook */ + +#include + +#include "trace_vprintk.lskel.h" + +void test_verif_stats(void) +{ + __u32 len = sizeof(struct bpf_prog_info); + struct bpf_prog_info info = {}; + struct trace_vprintk *skel; + int err; + + skel = trace_vprintk__open_and_load(); + if (!ASSERT_OK_PTR(skel, "trace_vprintk__open_and_load")) + goto cleanup; + + err = bpf_obj_get_info_by_fd(skel->progs.sys_enter.prog_fd, &info, &len); + if (!ASSERT_OK(err, "bpf_obj_get_info_by_fd")) + goto cleanup; + + if (!ASSERT_GT(info.verified_insns, 0, "verified_insns")) + goto cleanup; + +cleanup: + trace_vprintk__destroy(skel); +} From d4121376ac7a9c81a696d7558789b2f29ef3574e Mon Sep 17 00:00:00 2001 From: Jiri Olsa Date: Thu, 21 Oct 2021 13:41:30 +0200 Subject: [PATCH 084/181] selftests/bpf: Fix perf_buffer test on system with offline cpus The perf_buffer fails on system with offline cpus: # test_progs -t perf_buffer test_perf_buffer:PASS:nr_cpus 0 nsec test_perf_buffer:PASS:nr_on_cpus 0 nsec test_perf_buffer:PASS:skel_load 0 nsec test_perf_buffer:PASS:attach_kprobe 0 nsec test_perf_buffer:PASS:perf_buf__new 0 nsec test_perf_buffer:PASS:epoll_fd 0 nsec skipping offline CPU #24 skipping offline CPU #25 skipping offline CPU #26 skipping offline CPU #27 skipping offline CPU #28 skipping offline CPU #29 skipping offline CPU #30 skipping offline CPU #31 test_perf_buffer:PASS:perf_buffer__poll 0 nsec test_perf_buffer:PASS:seen_cpu_cnt 0 nsec test_perf_buffer:FAIL:buf_cnt got 24, expected 32 Summary: 0/0 PASSED, 0 SKIPPED, 1 FAILED Changing the test to check online cpus instead of possible. Signed-off-by: Jiri Olsa Signed-off-by: Andrii Nakryiko Acked-by: John Fastabend Link: https://lore.kernel.org/bpf/20211021114132.8196-2-jolsa@kernel.org --- tools/testing/selftests/bpf/prog_tests/perf_buffer.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tools/testing/selftests/bpf/prog_tests/perf_buffer.c b/tools/testing/selftests/bpf/prog_tests/perf_buffer.c index 6979aff4aab22..877600392851a 100644 --- a/tools/testing/selftests/bpf/prog_tests/perf_buffer.c +++ b/tools/testing/selftests/bpf/prog_tests/perf_buffer.c @@ -107,8 +107,8 @@ void serial_test_perf_buffer(void) "expect %d, seen %d\n", nr_on_cpus, CPU_COUNT(&cpu_seen))) goto out_free_pb; - if (CHECK(perf_buffer__buffer_cnt(pb) != nr_cpus, "buf_cnt", - "got %zu, expected %d\n", perf_buffer__buffer_cnt(pb), nr_cpus)) + if (CHECK(perf_buffer__buffer_cnt(pb) != nr_on_cpus, "buf_cnt", + "got %zu, expected %d\n", perf_buffer__buffer_cnt(pb), nr_on_cpus)) goto out_close; for (i = 0; i < nr_cpus; i++) { From aa274f98b269b2babf37b6308f8a1a009ca4fc41 Mon Sep 17 00:00:00 2001 From: Jiri Olsa Date: Thu, 21 Oct 2021 13:41:31 +0200 Subject: [PATCH 085/181] selftests/bpf: Fix possible/online index mismatch in perf_buffer test The perf_buffer fails on system with offline cpus: # test_progs -t perf_buffer serial_test_perf_buffer:PASS:nr_cpus 0 nsec serial_test_perf_buffer:PASS:nr_on_cpus 0 nsec serial_test_perf_buffer:PASS:skel_load 0 nsec serial_test_perf_buffer:PASS:attach_kprobe 0 nsec serial_test_perf_buffer:PASS:perf_buf__new 0 nsec serial_test_perf_buffer:PASS:epoll_fd 0 nsec skipping offline CPU #4 serial_test_perf_buffer:PASS:perf_buffer__poll 0 nsec serial_test_perf_buffer:PASS:seen_cpu_cnt 0 nsec serial_test_perf_buffer:PASS:buf_cnt 0 nsec ... serial_test_perf_buffer:PASS:fd_check 0 nsec serial_test_perf_buffer:PASS:drain_buf 0 nsec serial_test_perf_buffer:PASS:consume_buf 0 nsec serial_test_perf_buffer:FAIL:cpu_seen cpu 5 not seen #88 perf_buffer:FAIL Summary: 0/0 PASSED, 0 SKIPPED, 1 FAILED If the offline cpu is from the middle of the possible set, we get mismatch with possible and online cpu buffers. The perf buffer test calls perf_buffer__consume_buffer for all 'possible' cpus, but the library holds only 'online' cpu buffers and perf_buffer__consume_buffer returns them based on index. Adding extra (online) index to keep track of online buffers, we need the original (possible) index to trigger trace on proper cpu. Signed-off-by: Jiri Olsa Signed-off-by: Andrii Nakryiko Acked-by: John Fastabend Link: https://lore.kernel.org/bpf/20211021114132.8196-3-jolsa@kernel.org --- .../testing/selftests/bpf/prog_tests/perf_buffer.c | 13 +++++++------ 1 file changed, 7 insertions(+), 6 deletions(-) diff --git a/tools/testing/selftests/bpf/prog_tests/perf_buffer.c b/tools/testing/selftests/bpf/prog_tests/perf_buffer.c index 877600392851a..0b0cd045979b5 100644 --- a/tools/testing/selftests/bpf/prog_tests/perf_buffer.c +++ b/tools/testing/selftests/bpf/prog_tests/perf_buffer.c @@ -45,7 +45,7 @@ int trigger_on_cpu(int cpu) void serial_test_perf_buffer(void) { - int err, on_len, nr_on_cpus = 0, nr_cpus, i; + int err, on_len, nr_on_cpus = 0, nr_cpus, i, j; struct perf_buffer_opts pb_opts = {}; struct test_perf_buffer *skel; cpu_set_t cpu_seen; @@ -111,15 +111,15 @@ void serial_test_perf_buffer(void) "got %zu, expected %d\n", perf_buffer__buffer_cnt(pb), nr_on_cpus)) goto out_close; - for (i = 0; i < nr_cpus; i++) { + for (i = 0, j = 0; i < nr_cpus; i++) { if (i >= on_len || !online[i]) continue; - fd = perf_buffer__buffer_fd(pb, i); + fd = perf_buffer__buffer_fd(pb, j); CHECK(fd < 0 || last_fd == fd, "fd_check", "last fd %d == fd %d\n", last_fd, fd); last_fd = fd; - err = perf_buffer__consume_buffer(pb, i); + err = perf_buffer__consume_buffer(pb, j); if (CHECK(err, "drain_buf", "cpu %d, err %d\n", i, err)) goto out_close; @@ -127,12 +127,13 @@ void serial_test_perf_buffer(void) if (trigger_on_cpu(i)) goto out_close; - err = perf_buffer__consume_buffer(pb, i); - if (CHECK(err, "consume_buf", "cpu %d, err %d\n", i, err)) + err = perf_buffer__consume_buffer(pb, j); + if (CHECK(err, "consume_buf", "cpu %d, err %d\n", j, err)) goto out_close; if (CHECK(!CPU_ISSET(i, &cpu_seen), "cpu_seen", "cpu %d not seen\n", i)) goto out_close; + j++; } out_free_pb: From 99d099757ab487e0d317c69541b47aaae0b6c431 Mon Sep 17 00:00:00 2001 From: Jiri Olsa Date: Thu, 21 Oct 2021 13:41:32 +0200 Subject: [PATCH 086/181] selftests/bpf: Use nanosleep tracepoint in perf buffer test The perf buffer tests triggers trace with nanosleep syscall, but monitors all syscalls, which results in lot of data in the buffer and makes it harder to debug. Let's lower the trace traffic and monitor just nanosleep syscall. Signed-off-by: Jiri Olsa Signed-off-by: Andrii Nakryiko Acked-by: John Fastabend Link: https://lore.kernel.org/bpf/20211021114132.8196-4-jolsa@kernel.org --- tools/testing/selftests/bpf/progs/test_perf_buffer.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tools/testing/selftests/bpf/progs/test_perf_buffer.c b/tools/testing/selftests/bpf/progs/test_perf_buffer.c index d37ce29fd393f..a08874c5bdf2f 100644 --- a/tools/testing/selftests/bpf/progs/test_perf_buffer.c +++ b/tools/testing/selftests/bpf/progs/test_perf_buffer.c @@ -12,7 +12,7 @@ struct { __type(value, int); } perf_buf_map SEC(".maps"); -SEC("tp/raw_syscalls/sys_enter") +SEC("tp/syscalls/sys_enter_nanosleep") int handle_sys_enter(void *ctx) { int cpu = bpf_get_smp_processor_id(); From b96c07f3b5ae6944eb52fd96a322340aa80aef5d Mon Sep 17 00:00:00 2001 From: Andrii Nakryiko Date: Wed, 20 Oct 2021 18:43:55 -0700 Subject: [PATCH 087/181] libbpf: Deprecate btf__finalize_data() and move it into libbpf.c There isn't a good use case where anyone but libbpf itself needs to call btf__finalize_data(). It was implemented for internal use and it's not clear why it was made into public API in the first place. To function, it requires active ELF data, which is stored inside bpf_object for the duration of opening phase only. But the only BTF that needs bpf_object's ELF is that bpf_object's BTF itself, which libbpf fixes up automatically during bpf_object__open() operation anyways. There is no need for any additional fix up and no reasonable scenario where it's useful and appropriate. Thus, btf__finalize_data() is just an API atavism and is better removed. So this patch marks it as deprecated immediately (v0.6+) and moves the code from btf.c into libbpf.c where it's used in the context of bpf_object opening phase. Such code co-location allows to make code structure more straightforward and remove bpf_object__section_size() and bpf_object__variable_offset() internal helpers from libbpf_internal.h, making them static. Their naming is also adjusted to not create a wrong illusion that they are some sort of method of bpf_object. They are internal helpers and are called appropriately. This is part of libbpf 1.0 effort ([0]). [0] Closes: https://github.com/libbpf/libbpf/issues/276 Signed-off-by: Andrii Nakryiko Signed-off-by: Alexei Starovoitov Acked-by: Song Liu Link: https://lore.kernel.org/bpf/20211021014404.2635234-2-andrii@kernel.org --- tools/lib/bpf/btf.c | 93 ---------------------------- tools/lib/bpf/btf.h | 1 + tools/lib/bpf/libbpf.c | 106 ++++++++++++++++++++++++++++++-- tools/lib/bpf/libbpf_internal.h | 4 -- 4 files changed, 102 insertions(+), 102 deletions(-) diff --git a/tools/lib/bpf/btf.c b/tools/lib/bpf/btf.c index 1f6dea11f6004..3a01c4b7f36ab 100644 --- a/tools/lib/bpf/btf.c +++ b/tools/lib/bpf/btf.c @@ -1107,99 +1107,6 @@ struct btf *btf__parse_split(const char *path, struct btf *base_btf) return libbpf_ptr(btf_parse(path, base_btf, NULL)); } -static int compare_vsi_off(const void *_a, const void *_b) -{ - const struct btf_var_secinfo *a = _a; - const struct btf_var_secinfo *b = _b; - - return a->offset - b->offset; -} - -static int btf_fixup_datasec(struct bpf_object *obj, struct btf *btf, - struct btf_type *t) -{ - __u32 size = 0, off = 0, i, vars = btf_vlen(t); - const char *name = btf__name_by_offset(btf, t->name_off); - const struct btf_type *t_var; - struct btf_var_secinfo *vsi; - const struct btf_var *var; - int ret; - - if (!name) { - pr_debug("No name found in string section for DATASEC kind.\n"); - return -ENOENT; - } - - /* .extern datasec size and var offsets were set correctly during - * extern collection step, so just skip straight to sorting variables - */ - if (t->size) - goto sort_vars; - - ret = bpf_object__section_size(obj, name, &size); - if (ret || !size || (t->size && t->size != size)) { - pr_debug("Invalid size for section %s: %u bytes\n", name, size); - return -ENOENT; - } - - t->size = size; - - for (i = 0, vsi = btf_var_secinfos(t); i < vars; i++, vsi++) { - t_var = btf__type_by_id(btf, vsi->type); - var = btf_var(t_var); - - if (!btf_is_var(t_var)) { - pr_debug("Non-VAR type seen in section %s\n", name); - return -EINVAL; - } - - if (var->linkage == BTF_VAR_STATIC) - continue; - - name = btf__name_by_offset(btf, t_var->name_off); - if (!name) { - pr_debug("No name found in string section for VAR kind\n"); - return -ENOENT; - } - - ret = bpf_object__variable_offset(obj, name, &off); - if (ret) { - pr_debug("No offset found in symbol table for VAR %s\n", - name); - return -ENOENT; - } - - vsi->offset = off; - } - -sort_vars: - qsort(btf_var_secinfos(t), vars, sizeof(*vsi), compare_vsi_off); - return 0; -} - -int btf__finalize_data(struct bpf_object *obj, struct btf *btf) -{ - int err = 0; - __u32 i; - - for (i = 1; i <= btf->nr_types; i++) { - struct btf_type *t = btf_type_by_id(btf, i); - - /* Loader needs to fix up some of the things compiler - * couldn't get its hands on while emitting BTF. This - * is section size and global variable offset. We use - * the info from the ELF itself for this purpose. - */ - if (btf_is_datasec(t)) { - err = btf_fixup_datasec(obj, btf, t); - if (err) - break; - } - } - - return libbpf_err(err); -} - static void *btf_get_raw_data(const struct btf *btf, __u32 *size, bool swap_endian); int btf__load_into_kernel(struct btf *btf) diff --git a/tools/lib/bpf/btf.h b/tools/lib/bpf/btf.h index 4011e206e6f7f..c9364be420352 100644 --- a/tools/lib/bpf/btf.h +++ b/tools/lib/bpf/btf.h @@ -123,6 +123,7 @@ LIBBPF_API struct btf *btf__load_from_kernel_by_id_split(__u32 id, struct btf *b LIBBPF_DEPRECATED_SINCE(0, 6, "use btf__load_from_kernel_by_id instead") LIBBPF_API int btf__get_from_id(__u32 id, struct btf **btf); +LIBBPF_DEPRECATED_SINCE(0, 6, "intended for internal libbpf use only") LIBBPF_API int btf__finalize_data(struct bpf_object *obj, struct btf *btf); LIBBPF_DEPRECATED_SINCE(0, 6, "use btf__load_into_kernel instead") LIBBPF_API int btf__load(struct btf *btf); diff --git a/tools/lib/bpf/libbpf.c b/tools/lib/bpf/libbpf.c index 760c7e3466038..fdc25a1121502 100644 --- a/tools/lib/bpf/libbpf.c +++ b/tools/lib/bpf/libbpf.c @@ -1324,8 +1324,7 @@ static bool bpf_map_type__is_map_in_map(enum bpf_map_type type) return false; } -int bpf_object__section_size(const struct bpf_object *obj, const char *name, - __u32 *size) +static int find_elf_sec_sz(const struct bpf_object *obj, const char *name, __u32 *size) { int ret = -ENOENT; @@ -1357,8 +1356,7 @@ int bpf_object__section_size(const struct bpf_object *obj, const char *name, return *size ? 0 : ret; } -int bpf_object__variable_offset(const struct bpf_object *obj, const char *name, - __u32 *off) +static int find_elf_var_offset(const struct bpf_object *obj, const char *name, __u32 *off) { Elf_Data *symbols = obj->efile.symbols; const char *sname; @@ -2650,6 +2648,104 @@ static int bpf_object__init_btf(struct bpf_object *obj, return 0; } +static int compare_vsi_off(const void *_a, const void *_b) +{ + const struct btf_var_secinfo *a = _a; + const struct btf_var_secinfo *b = _b; + + return a->offset - b->offset; +} + +static int btf_fixup_datasec(struct bpf_object *obj, struct btf *btf, + struct btf_type *t) +{ + __u32 size = 0, off = 0, i, vars = btf_vlen(t); + const char *name = btf__name_by_offset(btf, t->name_off); + const struct btf_type *t_var; + struct btf_var_secinfo *vsi; + const struct btf_var *var; + int ret; + + if (!name) { + pr_debug("No name found in string section for DATASEC kind.\n"); + return -ENOENT; + } + + /* .extern datasec size and var offsets were set correctly during + * extern collection step, so just skip straight to sorting variables + */ + if (t->size) + goto sort_vars; + + ret = find_elf_sec_sz(obj, name, &size); + if (ret || !size || (t->size && t->size != size)) { + pr_debug("Invalid size for section %s: %u bytes\n", name, size); + return -ENOENT; + } + + t->size = size; + + for (i = 0, vsi = btf_var_secinfos(t); i < vars; i++, vsi++) { + t_var = btf__type_by_id(btf, vsi->type); + var = btf_var(t_var); + + if (!btf_is_var(t_var)) { + pr_debug("Non-VAR type seen in section %s\n", name); + return -EINVAL; + } + + if (var->linkage == BTF_VAR_STATIC) + continue; + + name = btf__name_by_offset(btf, t_var->name_off); + if (!name) { + pr_debug("No name found in string section for VAR kind\n"); + return -ENOENT; + } + + ret = find_elf_var_offset(obj, name, &off); + if (ret) { + pr_debug("No offset found in symbol table for VAR %s\n", + name); + return -ENOENT; + } + + vsi->offset = off; + } + +sort_vars: + qsort(btf_var_secinfos(t), vars, sizeof(*vsi), compare_vsi_off); + return 0; +} + +static int btf_finalize_data(struct bpf_object *obj, struct btf *btf) +{ + int err = 0; + __u32 i, n = btf__get_nr_types(btf); + + for (i = 1; i <= n; i++) { + struct btf_type *t = btf_type_by_id(btf, i); + + /* Loader needs to fix up some of the things compiler + * couldn't get its hands on while emitting BTF. This + * is section size and global variable offset. We use + * the info from the ELF itself for this purpose. + */ + if (btf_is_datasec(t)) { + err = btf_fixup_datasec(obj, btf, t); + if (err) + break; + } + } + + return libbpf_err(err); +} + +int btf__finalize_data(struct bpf_object *obj, struct btf *btf) +{ + return btf_finalize_data(obj, btf); +} + static int bpf_object__finalize_btf(struct bpf_object *obj) { int err; @@ -2657,7 +2753,7 @@ static int bpf_object__finalize_btf(struct bpf_object *obj) if (!obj->btf) return 0; - err = btf__finalize_data(obj, obj->btf); + err = btf_finalize_data(obj, obj->btf); if (err) { pr_warn("Error finalizing %s: %d.\n", BTF_ELF_SEC, err); return err; diff --git a/tools/lib/bpf/libbpf_internal.h b/tools/lib/bpf/libbpf_internal.h index f6a5748dd3183..4bbd327a4ce97 100644 --- a/tools/lib/bpf/libbpf_internal.h +++ b/tools/lib/bpf/libbpf_internal.h @@ -303,10 +303,6 @@ struct bpf_prog_load_params { int libbpf__bpf_prog_load(const struct bpf_prog_load_params *load_attr); -int bpf_object__section_size(const struct bpf_object *obj, const char *name, - __u32 *size); -int bpf_object__variable_offset(const struct bpf_object *obj, const char *name, - __u32 *off); struct btf *btf_get_from_fd(int btf_fd, struct btf *base_btf); void btf_get_kernel_prefix_kind(enum bpf_attach_type attach_type, const char **prefix, int *kind); From 29a30ff501518a49282754909543cef1ef49e4bc Mon Sep 17 00:00:00 2001 From: Andrii Nakryiko Date: Wed, 20 Oct 2021 18:43:56 -0700 Subject: [PATCH 088/181] libbpf: Extract ELF processing state into separate struct Name currently anonymous internal struct that keeps ELF-related state for bpf_object. Just a bit of clean up, no functional changes. Signed-off-by: Andrii Nakryiko Signed-off-by: Alexei Starovoitov Acked-by: Song Liu Link: https://lore.kernel.org/bpf/20211021014404.2635234-3-andrii@kernel.org --- tools/lib/bpf/libbpf.c | 70 ++++++++++++++++++++---------------------- 1 file changed, 34 insertions(+), 36 deletions(-) diff --git a/tools/lib/bpf/libbpf.c b/tools/lib/bpf/libbpf.c index fdc25a1121502..84a0683d214bb 100644 --- a/tools/lib/bpf/libbpf.c +++ b/tools/lib/bpf/libbpf.c @@ -462,6 +462,35 @@ struct module_btf { int fd_array_idx; }; +struct elf_state { + int fd; + const void *obj_buf; + size_t obj_buf_sz; + Elf *elf; + GElf_Ehdr ehdr; + Elf_Data *symbols; + Elf_Data *data; + Elf_Data *rodata; + Elf_Data *bss; + Elf_Data *st_ops_data; + size_t shstrndx; /* section index for section name strings */ + size_t strtabidx; + struct { + GElf_Shdr shdr; + Elf_Data *data; + } *reloc_sects; + int nr_reloc_sects; + int maps_shndx; + int btf_maps_shndx; + __u32 btf_maps_sec_btf_id; + int text_shndx; + int symbols_shndx; + int data_shndx; + int rodata_shndx; + int bss_shndx; + int st_ops_shndx; +}; + struct bpf_object { char name[BPF_OBJ_NAME_LEN]; char license[64]; @@ -484,40 +513,10 @@ struct bpf_object { struct bpf_gen *gen_loader; + /* Information when doing ELF related work. Only valid if efile.elf is not NULL */ + struct elf_state efile; /* - * Information when doing elf related work. Only valid if fd - * is valid. - */ - struct { - int fd; - const void *obj_buf; - size_t obj_buf_sz; - Elf *elf; - GElf_Ehdr ehdr; - Elf_Data *symbols; - Elf_Data *data; - Elf_Data *rodata; - Elf_Data *bss; - Elf_Data *st_ops_data; - size_t shstrndx; /* section index for section name strings */ - size_t strtabidx; - struct { - GElf_Shdr shdr; - Elf_Data *data; - } *reloc_sects; - int nr_reloc_sects; - int maps_shndx; - int btf_maps_shndx; - __u32 btf_maps_sec_btf_id; - int text_shndx; - int symbols_shndx; - int data_shndx; - int rodata_shndx; - int bss_shndx; - int st_ops_shndx; - } efile; - /* - * All loaded bpf_object is linked in a list, which is + * All loaded bpf_object are linked in a list, which is * hidden to caller. bpf_objects__ handlers deal with * all objects. */ @@ -551,7 +550,6 @@ struct bpf_object { char path[]; }; -#define obj_elf_valid(o) ((o)->efile.elf) static const char *elf_sym_str(const struct bpf_object *obj, size_t off); static const char *elf_sec_str(const struct bpf_object *obj, size_t off); @@ -1185,7 +1183,7 @@ static struct bpf_object *bpf_object__new(const char *path, static void bpf_object__elf_finish(struct bpf_object *obj) { - if (!obj_elf_valid(obj)) + if (!obj->efile.elf) return; if (obj->efile.elf) { @@ -1210,7 +1208,7 @@ static int bpf_object__elf_init(struct bpf_object *obj) int err = 0; GElf_Ehdr *ep; - if (obj_elf_valid(obj)) { + if (obj->efile.elf) { pr_warn("elf: init internal error\n"); return -LIBBPF_ERRNO__LIBELF; } From ad23b7238474c6319bf692ae6ce037d9696df1d1 Mon Sep 17 00:00:00 2001 From: Andrii Nakryiko Date: Wed, 20 Oct 2021 18:43:57 -0700 Subject: [PATCH 089/181] libbpf: Use Elf64-specific types explicitly for dealing with ELF Minimize the usage of class-agnostic gelf_xxx() APIs from libelf. These APIs require copying ELF data structures into local GElf_xxx structs and have a more cumbersome API. BPF ELF file is defined to be always 64-bit ELF object, even when intended to be run on 32-bit host architectures, so there is no need to do class-agnostic conversions everywhere. BPF static linker implementation within libbpf has been using Elf64-specific types since initial implementation. Add two simple helpers, elf_sym_by_idx() and elf_rel_by_idx(), for more succinct direct access to ELF symbol and relocation records within ELF data itself and switch all the GElf_xxx usage into Elf64_xxx equivalents. The only remaining place within libbpf.c that's still using gelf API is gelf_getclass(), as there doesn't seem to be a direct way to get underlying ELF bitness. No functional changes intended. Signed-off-by: Andrii Nakryiko Signed-off-by: Alexei Starovoitov Acked-by: Song Liu Link: https://lore.kernel.org/bpf/20211021014404.2635234-4-andrii@kernel.org --- tools/lib/bpf/libbpf.c | 353 ++++++++++++++++++-------------- tools/lib/bpf/libbpf_internal.h | 4 +- tools/lib/bpf/linker.c | 1 - 3 files changed, 196 insertions(+), 162 deletions(-) diff --git a/tools/lib/bpf/libbpf.c b/tools/lib/bpf/libbpf.c index 84a0683d214bb..74f7d9252d132 100644 --- a/tools/lib/bpf/libbpf.c +++ b/tools/lib/bpf/libbpf.c @@ -467,7 +467,7 @@ struct elf_state { const void *obj_buf; size_t obj_buf_sz; Elf *elf; - GElf_Ehdr ehdr; + Elf64_Ehdr *ehdr; Elf_Data *symbols; Elf_Data *data; Elf_Data *rodata; @@ -476,7 +476,7 @@ struct elf_state { size_t shstrndx; /* section index for section name strings */ size_t strtabidx; struct { - GElf_Shdr shdr; + Elf64_Shdr *shdr; Elf_Data *data; } *reloc_sects; int nr_reloc_sects; @@ -555,9 +555,11 @@ static const char *elf_sym_str(const struct bpf_object *obj, size_t off); static const char *elf_sec_str(const struct bpf_object *obj, size_t off); static Elf_Scn *elf_sec_by_idx(const struct bpf_object *obj, size_t idx); static Elf_Scn *elf_sec_by_name(const struct bpf_object *obj, const char *name); -static int elf_sec_hdr(const struct bpf_object *obj, Elf_Scn *scn, GElf_Shdr *hdr); +static Elf64_Shdr *elf_sec_hdr(const struct bpf_object *obj, Elf_Scn *scn); static const char *elf_sec_name(const struct bpf_object *obj, Elf_Scn *scn); static Elf_Data *elf_sec_data(const struct bpf_object *obj, Elf_Scn *scn); +static Elf64_Sym *elf_sym_by_idx(const struct bpf_object *obj, size_t idx); +static Elf64_Rel *elf_rel_by_idx(Elf_Data *data, size_t idx); void bpf_program__unload(struct bpf_program *prog) { @@ -699,25 +701,25 @@ bpf_object__add_programs(struct bpf_object *obj, Elf_Data *sec_data, size_t sec_sz = sec_data->d_size, sec_off, prog_sz, nr_syms; int nr_progs, err, i; const char *name; - GElf_Sym sym; + Elf64_Sym *sym; progs = obj->programs; nr_progs = obj->nr_programs; - nr_syms = symbols->d_size / sizeof(GElf_Sym); + nr_syms = symbols->d_size / sizeof(Elf64_Sym); sec_off = 0; for (i = 0; i < nr_syms; i++) { - if (!gelf_getsym(symbols, i, &sym)) - continue; - if (sym.st_shndx != sec_idx) + sym = elf_sym_by_idx(obj, i); + + if (sym->st_shndx != sec_idx) continue; - if (GELF_ST_TYPE(sym.st_info) != STT_FUNC) + if (ELF64_ST_TYPE(sym->st_info) != STT_FUNC) continue; - prog_sz = sym.st_size; - sec_off = sym.st_value; + prog_sz = sym->st_size; + sec_off = sym->st_value; - name = elf_sym_str(obj, sym.st_name); + name = elf_sym_str(obj, sym->st_name); if (!name) { pr_warn("sec '%s': failed to get symbol name for offset %zu\n", sec_name, sec_off); @@ -730,7 +732,7 @@ bpf_object__add_programs(struct bpf_object *obj, Elf_Data *sec_data, return -LIBBPF_ERRNO__FORMAT; } - if (sec_idx != obj->efile.text_shndx && GELF_ST_BIND(sym.st_info) == STB_LOCAL) { + if (sec_idx != obj->efile.text_shndx && ELF64_ST_BIND(sym->st_info) == STB_LOCAL) { pr_warn("sec '%s': program '%s' is static and not supported\n", sec_name, name); return -ENOTSUP; } @@ -763,9 +765,9 @@ bpf_object__add_programs(struct bpf_object *obj, Elf_Data *sec_data, * as static to enable more permissive BPF verification mode * with more outside context available to BPF verifier */ - if (GELF_ST_BIND(sym.st_info) != STB_LOCAL - && (GELF_ST_VISIBILITY(sym.st_other) == STV_HIDDEN - || GELF_ST_VISIBILITY(sym.st_other) == STV_INTERNAL)) + if (ELF64_ST_BIND(sym->st_info) != STB_LOCAL + && (ELF64_ST_VISIBILITY(sym->st_other) == STV_HIDDEN + || ELF64_ST_VISIBILITY(sym->st_other) == STV_INTERNAL)) prog->mark_btf_static = true; nr_progs++; @@ -1205,8 +1207,9 @@ static void bpf_object__elf_finish(struct bpf_object *obj) static int bpf_object__elf_init(struct bpf_object *obj) { + Elf64_Ehdr *ehdr; int err = 0; - GElf_Ehdr *ep; + Elf *elf; if (obj->efile.elf) { pr_warn("elf: init internal error\n"); @@ -1218,8 +1221,7 @@ static int bpf_object__elf_init(struct bpf_object *obj) * obj_buf should have been validated by * bpf_object__open_buffer(). */ - obj->efile.elf = elf_memory((char *)obj->efile.obj_buf, - obj->efile.obj_buf_sz); + elf = elf_memory((char *)obj->efile.obj_buf, obj->efile.obj_buf_sz); } else { obj->efile.fd = open(obj->path, O_RDONLY); if (obj->efile.fd < 0) { @@ -1231,23 +1233,37 @@ static int bpf_object__elf_init(struct bpf_object *obj) return err; } - obj->efile.elf = elf_begin(obj->efile.fd, ELF_C_READ_MMAP, NULL); + elf = elf_begin(obj->efile.fd, ELF_C_READ_MMAP, NULL); } - if (!obj->efile.elf) { + if (!elf) { pr_warn("elf: failed to open %s as ELF file: %s\n", obj->path, elf_errmsg(-1)); err = -LIBBPF_ERRNO__LIBELF; goto errout; } - if (!gelf_getehdr(obj->efile.elf, &obj->efile.ehdr)) { + obj->efile.elf = elf; + + if (elf_kind(elf) != ELF_K_ELF) { + err = -LIBBPF_ERRNO__FORMAT; + pr_warn("elf: '%s' is not a proper ELF object\n", obj->path); + goto errout; + } + + if (gelf_getclass(elf) != ELFCLASS64) { + err = -LIBBPF_ERRNO__FORMAT; + pr_warn("elf: '%s' is not a 64-bit ELF object\n", obj->path); + goto errout; + } + + obj->efile.ehdr = ehdr = elf64_getehdr(elf); + if (!obj->efile.ehdr) { pr_warn("elf: failed to get ELF header from %s: %s\n", obj->path, elf_errmsg(-1)); err = -LIBBPF_ERRNO__FORMAT; goto errout; } - ep = &obj->efile.ehdr; - if (elf_getshdrstrndx(obj->efile.elf, &obj->efile.shstrndx)) { + if (elf_getshdrstrndx(elf, &obj->efile.shstrndx)) { pr_warn("elf: failed to get section names section index for %s: %s\n", obj->path, elf_errmsg(-1)); err = -LIBBPF_ERRNO__FORMAT; @@ -1255,7 +1271,7 @@ static int bpf_object__elf_init(struct bpf_object *obj) } /* Elf is corrupted/truncated, avoid calling elf_strptr. */ - if (!elf_rawdata(elf_getscn(obj->efile.elf, obj->efile.shstrndx), NULL)) { + if (!elf_rawdata(elf_getscn(elf, obj->efile.shstrndx), NULL)) { pr_warn("elf: failed to get section names strings from %s: %s\n", obj->path, elf_errmsg(-1)); err = -LIBBPF_ERRNO__FORMAT; @@ -1263,8 +1279,7 @@ static int bpf_object__elf_init(struct bpf_object *obj) } /* Old LLVM set e_machine to EM_NONE */ - if (ep->e_type != ET_REL || - (ep->e_machine && ep->e_machine != EM_BPF)) { + if (ehdr->e_type != ET_REL || (ehdr->e_machine && ehdr->e_machine != EM_BPF)) { pr_warn("elf: %s is not a valid eBPF object file\n", obj->path); err = -LIBBPF_ERRNO__FORMAT; goto errout; @@ -1279,10 +1294,10 @@ static int bpf_object__elf_init(struct bpf_object *obj) static int bpf_object__check_endianness(struct bpf_object *obj) { #if __BYTE_ORDER == __LITTLE_ENDIAN - if (obj->efile.ehdr.e_ident[EI_DATA] == ELFDATA2LSB) + if (obj->efile.ehdr->e_ident[EI_DATA] == ELFDATA2LSB) return 0; #elif __BYTE_ORDER == __BIG_ENDIAN - if (obj->efile.ehdr.e_ident[EI_DATA] == ELFDATA2MSB) + if (obj->efile.ehdr->e_ident[EI_DATA] == ELFDATA2MSB) return 0; #else # error "Unrecognized __BYTE_ORDER__" @@ -1363,23 +1378,20 @@ static int find_elf_var_offset(const struct bpf_object *obj, const char *name, _ if (!name || !off) return -EINVAL; - for (si = 0; si < symbols->d_size / sizeof(GElf_Sym); si++) { - GElf_Sym sym; + for (si = 0; si < symbols->d_size / sizeof(Elf64_Sym); si++) { + Elf64_Sym *sym = elf_sym_by_idx(obj, si); - if (!gelf_getsym(symbols, si, &sym)) - continue; - if (GELF_ST_BIND(sym.st_info) != STB_GLOBAL || - GELF_ST_TYPE(sym.st_info) != STT_OBJECT) + if (ELF64_ST_BIND(sym->st_info) != STB_GLOBAL || + ELF64_ST_TYPE(sym->st_info) != STT_OBJECT) continue; - sname = elf_sym_str(obj, sym.st_name); + sname = elf_sym_str(obj, sym->st_name); if (!sname) { - pr_warn("failed to get sym name string for var %s\n", - name); + pr_warn("failed to get sym name string for var %s\n", name); return -EIO; } if (strcmp(name, sname) == 0) { - *off = sym.st_value; + *off = sym->st_value; return 0; } } @@ -1866,15 +1878,13 @@ static int bpf_object__init_user_maps(struct bpf_object *obj, bool strict) * * TODO: Detect array of map and report error. */ - nr_syms = symbols->d_size / sizeof(GElf_Sym); + nr_syms = symbols->d_size / sizeof(Elf64_Sym); for (i = 0; i < nr_syms; i++) { - GElf_Sym sym; + Elf64_Sym *sym = elf_sym_by_idx(obj, i); - if (!gelf_getsym(symbols, i, &sym)) + if (sym->st_shndx != obj->efile.maps_shndx) continue; - if (sym.st_shndx != obj->efile.maps_shndx) - continue; - if (GELF_ST_TYPE(sym.st_info) == STT_SECTION) + if (ELF64_ST_TYPE(sym->st_info) == STT_SECTION) continue; nr_maps++; } @@ -1891,40 +1901,38 @@ static int bpf_object__init_user_maps(struct bpf_object *obj, bool strict) /* Fill obj->maps using data in "maps" section. */ for (i = 0; i < nr_syms; i++) { - GElf_Sym sym; + Elf64_Sym *sym = elf_sym_by_idx(obj, i); const char *map_name; struct bpf_map_def *def; struct bpf_map *map; - if (!gelf_getsym(symbols, i, &sym)) - continue; - if (sym.st_shndx != obj->efile.maps_shndx) + if (sym->st_shndx != obj->efile.maps_shndx) continue; - if (GELF_ST_TYPE(sym.st_info) == STT_SECTION) + if (ELF64_ST_TYPE(sym->st_info) == STT_SECTION) continue; map = bpf_object__add_map(obj); if (IS_ERR(map)) return PTR_ERR(map); - map_name = elf_sym_str(obj, sym.st_name); + map_name = elf_sym_str(obj, sym->st_name); if (!map_name) { pr_warn("failed to get map #%d name sym string for obj %s\n", i, obj->path); return -LIBBPF_ERRNO__FORMAT; } - if (GELF_ST_BIND(sym.st_info) == STB_LOCAL) { + if (ELF64_ST_BIND(sym->st_info) == STB_LOCAL) { pr_warn("map '%s' (legacy): static maps are not supported\n", map_name); return -ENOTSUP; } map->libbpf_type = LIBBPF_MAP_UNSPEC; - map->sec_idx = sym.st_shndx; - map->sec_offset = sym.st_value; + map->sec_idx = sym->st_shndx; + map->sec_offset = sym->st_value; pr_debug("map '%s' (legacy): at sec_idx %d, offset %zu.\n", map_name, map->sec_idx, map->sec_offset); - if (sym.st_value + map_def_sz > data->d_size) { + if (sym->st_value + map_def_sz > data->d_size) { pr_warn("corrupted maps section in %s: last map \"%s\" too small\n", obj->path, map_name); return -EINVAL; @@ -1936,7 +1944,7 @@ static int bpf_object__init_user_maps(struct bpf_object *obj, bool strict) return -ENOMEM; } pr_debug("map %d is \"%s\"\n", i, map->name); - def = (struct bpf_map_def *)(data->d_buf + sym.st_value); + def = (struct bpf_map_def *)(data->d_buf + sym->st_value); /* * If the definition of the map in the object file fits in * bpf_map_def, copy it. Any extra fields in our version @@ -2506,12 +2514,13 @@ static int bpf_object__init_maps(struct bpf_object *obj, static bool section_have_execinstr(struct bpf_object *obj, int idx) { - GElf_Shdr sh; + Elf64_Shdr *sh; - if (elf_sec_hdr(obj, elf_sec_by_idx(obj, idx), &sh)) + sh = elf_sec_hdr(obj, elf_sec_by_idx(obj, idx)); + if (!sh) return false; - return sh.sh_flags & SHF_EXECINSTR; + return sh->sh_flags & SHF_EXECINSTR; } static bool btf_needs_sanitization(struct bpf_object *obj) @@ -2987,32 +2996,36 @@ static Elf_Scn *elf_sec_by_name(const struct bpf_object *obj, const char *name) return NULL; } -static int elf_sec_hdr(const struct bpf_object *obj, Elf_Scn *scn, GElf_Shdr *hdr) +static Elf64_Shdr *elf_sec_hdr(const struct bpf_object *obj, Elf_Scn *scn) { + Elf64_Shdr *shdr; + if (!scn) - return -EINVAL; + return NULL; - if (gelf_getshdr(scn, hdr) != hdr) { + shdr = elf64_getshdr(scn); + if (!shdr) { pr_warn("elf: failed to get section(%zu) header from %s: %s\n", elf_ndxscn(scn), obj->path, elf_errmsg(-1)); - return -EINVAL; + return NULL; } - return 0; + return shdr; } static const char *elf_sec_name(const struct bpf_object *obj, Elf_Scn *scn) { const char *name; - GElf_Shdr sh; + Elf64_Shdr *sh; if (!scn) return NULL; - if (elf_sec_hdr(obj, scn, &sh)) + sh = elf_sec_hdr(obj, scn); + if (!sh) return NULL; - name = elf_sec_str(obj, sh.sh_name); + name = elf_sec_str(obj, sh->sh_name); if (!name) { pr_warn("elf: failed to get section(%zu) name from %s: %s\n", elf_ndxscn(scn), obj->path, elf_errmsg(-1)); @@ -3040,13 +3053,29 @@ static Elf_Data *elf_sec_data(const struct bpf_object *obj, Elf_Scn *scn) return data; } +static Elf64_Sym *elf_sym_by_idx(const struct bpf_object *obj, size_t idx) +{ + if (idx >= obj->efile.symbols->d_size / sizeof(Elf64_Sym)) + return NULL; + + return (Elf64_Sym *)obj->efile.symbols->d_buf + idx; +} + +static Elf64_Rel *elf_rel_by_idx(Elf_Data *data, size_t idx) +{ + if (idx >= data->d_size / sizeof(Elf64_Rel)) + return NULL; + + return (Elf64_Rel *)data->d_buf + idx; +} + static bool is_sec_name_dwarf(const char *name) { /* approximation, but the actual list is too long */ return str_has_pfx(name, ".debug_"); } -static bool ignore_elf_section(GElf_Shdr *hdr, const char *name) +static bool ignore_elf_section(Elf64_Shdr *hdr, const char *name) { /* no special handling of .strtab */ if (hdr->sh_type == SHT_STRTAB) @@ -3101,17 +3130,18 @@ static int bpf_object__elf_collect(struct bpf_object *obj) const char *name; Elf_Data *data; Elf_Scn *scn; - GElf_Shdr sh; + Elf64_Shdr *sh; /* a bunch of ELF parsing functionality depends on processing symbols, * so do the first pass and find the symbol table */ scn = NULL; while ((scn = elf_nextscn(elf, scn)) != NULL) { - if (elf_sec_hdr(obj, scn, &sh)) + sh = elf_sec_hdr(obj, scn); + if (!sh) return -LIBBPF_ERRNO__FORMAT; - if (sh.sh_type == SHT_SYMTAB) { + if (sh->sh_type == SHT_SYMTAB) { if (obj->efile.symbols) { pr_warn("elf: multiple symbol tables in %s\n", obj->path); return -LIBBPF_ERRNO__FORMAT; @@ -3123,7 +3153,7 @@ static int bpf_object__elf_collect(struct bpf_object *obj) obj->efile.symbols = data; obj->efile.symbols_shndx = elf_ndxscn(scn); - obj->efile.strtabidx = sh.sh_link; + obj->efile.strtabidx = sh->sh_link; } } @@ -3137,14 +3167,15 @@ static int bpf_object__elf_collect(struct bpf_object *obj) while ((scn = elf_nextscn(elf, scn)) != NULL) { idx++; - if (elf_sec_hdr(obj, scn, &sh)) + sh = elf_sec_hdr(obj, scn); + if (!sh) return -LIBBPF_ERRNO__FORMAT; - name = elf_sec_str(obj, sh.sh_name); + name = elf_sec_str(obj, sh->sh_name); if (!name) return -LIBBPF_ERRNO__FORMAT; - if (ignore_elf_section(&sh, name)) + if (ignore_elf_section(sh, name)) continue; data = elf_sec_data(obj, scn); @@ -3153,8 +3184,8 @@ static int bpf_object__elf_collect(struct bpf_object *obj) pr_debug("elf: section(%d) %s, size %ld, link %d, flags %lx, type=%d\n", idx, name, (unsigned long)data->d_size, - (int)sh.sh_link, (unsigned long)sh.sh_flags, - (int)sh.sh_type); + (int)sh->sh_link, (unsigned long)sh->sh_flags, + (int)sh->sh_type); if (strcmp(name, "license") == 0) { err = bpf_object__init_license(obj, data->d_buf, data->d_size); @@ -3172,10 +3203,10 @@ static int bpf_object__elf_collect(struct bpf_object *obj) btf_data = data; } else if (strcmp(name, BTF_EXT_ELF_SEC) == 0) { btf_ext_data = data; - } else if (sh.sh_type == SHT_SYMTAB) { + } else if (sh->sh_type == SHT_SYMTAB) { /* already processed during the first pass above */ - } else if (sh.sh_type == SHT_PROGBITS && data->d_size > 0) { - if (sh.sh_flags & SHF_EXECINSTR) { + } else if (sh->sh_type == SHT_PROGBITS && data->d_size > 0) { + if (sh->sh_flags & SHF_EXECINSTR) { if (strcmp(name, ".text") == 0) obj->efile.text_shndx = idx; err = bpf_object__add_programs(obj, data, name, idx); @@ -3194,10 +3225,10 @@ static int bpf_object__elf_collect(struct bpf_object *obj) pr_info("elf: skipping unrecognized data section(%d) %s\n", idx, name); } - } else if (sh.sh_type == SHT_REL) { + } else if (sh->sh_type == SHT_REL) { int nr_sects = obj->efile.nr_reloc_sects; void *sects = obj->efile.reloc_sects; - int sec = sh.sh_info; /* points to other section */ + int sec = sh->sh_info; /* points to other section */ /* Only do relo for section with exec instructions */ if (!section_have_execinstr(obj, sec) && @@ -3219,12 +3250,12 @@ static int bpf_object__elf_collect(struct bpf_object *obj) obj->efile.reloc_sects[nr_sects].shdr = sh; obj->efile.reloc_sects[nr_sects].data = data; - } else if (sh.sh_type == SHT_NOBITS && strcmp(name, BSS_SEC) == 0) { + } else if (sh->sh_type == SHT_NOBITS && strcmp(name, BSS_SEC) == 0) { obj->efile.bss = data; obj->efile.bss_shndx = idx; } else { pr_info("elf: skipping section(%d) %s (size %zu)\n", idx, name, - (size_t)sh.sh_size); + (size_t)sh->sh_size); } } @@ -3240,19 +3271,19 @@ static int bpf_object__elf_collect(struct bpf_object *obj) return bpf_object__init_btf(obj, btf_data, btf_ext_data); } -static bool sym_is_extern(const GElf_Sym *sym) +static bool sym_is_extern(const Elf64_Sym *sym) { - int bind = GELF_ST_BIND(sym->st_info); + int bind = ELF64_ST_BIND(sym->st_info); /* externs are symbols w/ type=NOTYPE, bind=GLOBAL|WEAK, section=UND */ return sym->st_shndx == SHN_UNDEF && (bind == STB_GLOBAL || bind == STB_WEAK) && - GELF_ST_TYPE(sym->st_info) == STT_NOTYPE; + ELF64_ST_TYPE(sym->st_info) == STT_NOTYPE; } -static bool sym_is_subprog(const GElf_Sym *sym, int text_shndx) +static bool sym_is_subprog(const Elf64_Sym *sym, int text_shndx) { - int bind = GELF_ST_BIND(sym->st_info); - int type = GELF_ST_TYPE(sym->st_info); + int bind = ELF64_ST_BIND(sym->st_info); + int type = ELF64_ST_TYPE(sym->st_info); /* in .text section */ if (sym->st_shndx != text_shndx) @@ -3450,30 +3481,31 @@ static int bpf_object__collect_externs(struct bpf_object *obj) int i, n, off, dummy_var_btf_id; const char *ext_name, *sec_name; Elf_Scn *scn; - GElf_Shdr sh; + Elf64_Shdr *sh; if (!obj->efile.symbols) return 0; scn = elf_sec_by_idx(obj, obj->efile.symbols_shndx); - if (elf_sec_hdr(obj, scn, &sh)) + sh = elf_sec_hdr(obj, scn); + if (!sh) return -LIBBPF_ERRNO__FORMAT; dummy_var_btf_id = add_dummy_ksym_var(obj->btf); if (dummy_var_btf_id < 0) return dummy_var_btf_id; - n = sh.sh_size / sh.sh_entsize; + n = sh->sh_size / sh->sh_entsize; pr_debug("looking for externs among %d symbols...\n", n); for (i = 0; i < n; i++) { - GElf_Sym sym; + Elf64_Sym *sym = elf_sym_by_idx(obj, i); - if (!gelf_getsym(obj->efile.symbols, i, &sym)) + if (!sym) return -LIBBPF_ERRNO__FORMAT; - if (!sym_is_extern(&sym)) + if (!sym_is_extern(sym)) continue; - ext_name = elf_sym_str(obj, sym.st_name); + ext_name = elf_sym_str(obj, sym->st_name); if (!ext_name || !ext_name[0]) continue; @@ -3495,7 +3527,7 @@ static int bpf_object__collect_externs(struct bpf_object *obj) t = btf__type_by_id(obj->btf, ext->btf_id); ext->name = btf__name_by_offset(obj->btf, t->name_off); ext->sym_idx = i; - ext->is_weak = GELF_ST_BIND(sym.st_info) == STB_WEAK; + ext->is_weak = ELF64_ST_BIND(sym->st_info) == STB_WEAK; ext->sec_btf_id = find_extern_sec_btf_id(obj->btf, ext->btf_id); if (ext->sec_btf_id <= 0) { @@ -3730,7 +3762,7 @@ bpf_object__section_to_libbpf_map_type(const struct bpf_object *obj, int shndx) static int bpf_program__record_reloc(struct bpf_program *prog, struct reloc_desc *reloc_desc, __u32 insn_idx, const char *sym_name, - const GElf_Sym *sym, const GElf_Rel *rel) + const Elf64_Sym *sym, const Elf64_Rel *rel) { struct bpf_insn *insn = &prog->insns[insn_idx]; size_t map_idx, nr_maps = prog->obj->nr_maps; @@ -3747,7 +3779,7 @@ static int bpf_program__record_reloc(struct bpf_program *prog, } if (sym_is_extern(sym)) { - int sym_idx = GELF_R_SYM(rel->r_info); + int sym_idx = ELF64_R_SYM(rel->r_info); int i, n = obj->nr_extern; struct extern_desc *ext; @@ -3912,9 +3944,8 @@ static struct bpf_program *find_prog_by_sec_insn(const struct bpf_object *obj, } static int -bpf_object__collect_prog_relos(struct bpf_object *obj, GElf_Shdr *shdr, Elf_Data *data) +bpf_object__collect_prog_relos(struct bpf_object *obj, Elf64_Shdr *shdr, Elf_Data *data) { - Elf_Data *symbols = obj->efile.symbols; const char *relo_sec_name, *sec_name; size_t sec_idx = shdr->sh_info; struct bpf_program *prog; @@ -3924,8 +3955,8 @@ bpf_object__collect_prog_relos(struct bpf_object *obj, GElf_Shdr *shdr, Elf_Data __u32 insn_idx; Elf_Scn *scn; Elf_Data *scn_data; - GElf_Sym sym; - GElf_Rel rel; + Elf64_Sym *sym; + Elf64_Rel *rel; scn = elf_sec_by_idx(obj, sec_idx); scn_data = elf_sec_data(obj, scn); @@ -3940,33 +3971,36 @@ bpf_object__collect_prog_relos(struct bpf_object *obj, GElf_Shdr *shdr, Elf_Data nrels = shdr->sh_size / shdr->sh_entsize; for (i = 0; i < nrels; i++) { - if (!gelf_getrel(data, i, &rel)) { + rel = elf_rel_by_idx(data, i); + if (!rel) { pr_warn("sec '%s': failed to get relo #%d\n", relo_sec_name, i); return -LIBBPF_ERRNO__FORMAT; } - if (!gelf_getsym(symbols, GELF_R_SYM(rel.r_info), &sym)) { + + sym = elf_sym_by_idx(obj, ELF64_R_SYM(rel->r_info)); + if (!sym) { pr_warn("sec '%s': symbol 0x%zx not found for relo #%d\n", - relo_sec_name, (size_t)GELF_R_SYM(rel.r_info), i); + relo_sec_name, (size_t)ELF64_R_SYM(rel->r_info), i); return -LIBBPF_ERRNO__FORMAT; } - if (rel.r_offset % BPF_INSN_SZ || rel.r_offset >= scn_data->d_size) { + if (rel->r_offset % BPF_INSN_SZ || rel->r_offset >= scn_data->d_size) { pr_warn("sec '%s': invalid offset 0x%zx for relo #%d\n", - relo_sec_name, (size_t)GELF_R_SYM(rel.r_info), i); + relo_sec_name, (size_t)ELF64_R_SYM(rel->r_info), i); return -LIBBPF_ERRNO__FORMAT; } - insn_idx = rel.r_offset / BPF_INSN_SZ; + insn_idx = rel->r_offset / BPF_INSN_SZ; /* relocations against static functions are recorded as * relocations against the section that contains a function; * in such case, symbol will be STT_SECTION and sym.st_name * will point to empty string (0), so fetch section name * instead */ - if (GELF_ST_TYPE(sym.st_info) == STT_SECTION && sym.st_name == 0) - sym_name = elf_sec_name(obj, elf_sec_by_idx(obj, sym.st_shndx)); + if (ELF64_ST_TYPE(sym->st_info) == STT_SECTION && sym->st_name == 0) + sym_name = elf_sec_name(obj, elf_sec_by_idx(obj, sym->st_shndx)); else - sym_name = elf_sym_str(obj, sym.st_name); + sym_name = elf_sym_str(obj, sym->st_name); sym_name = sym_name ?: "sec_insn_off; err = bpf_program__record_reloc(prog, &relos[prog->nr_reloc], - insn_idx, sym_name, &sym, &rel); + insn_idx, sym_name, sym, rel); if (err) return err; @@ -6036,10 +6070,10 @@ bpf_object__relocate(struct bpf_object *obj, const char *targ_btf_path) } static int bpf_object__collect_st_ops_relos(struct bpf_object *obj, - GElf_Shdr *shdr, Elf_Data *data); + Elf64_Shdr *shdr, Elf_Data *data); static int bpf_object__collect_map_relos(struct bpf_object *obj, - GElf_Shdr *shdr, Elf_Data *data) + Elf64_Shdr *shdr, Elf_Data *data) { const int bpf_ptr_sz = 8, host_ptr_sz = sizeof(void *); int i, j, nrels, new_sz; @@ -6048,10 +6082,9 @@ static int bpf_object__collect_map_relos(struct bpf_object *obj, struct bpf_map *map = NULL, *targ_map; const struct btf_member *member; const char *name, *mname; - Elf_Data *symbols; unsigned int moff; - GElf_Sym sym; - GElf_Rel rel; + Elf64_Sym *sym; + Elf64_Rel *rel; void *tmp; if (!obj->efile.btf_maps_sec_btf_id || !obj->btf) @@ -6060,28 +6093,30 @@ static int bpf_object__collect_map_relos(struct bpf_object *obj, if (!sec) return -EINVAL; - symbols = obj->efile.symbols; nrels = shdr->sh_size / shdr->sh_entsize; for (i = 0; i < nrels; i++) { - if (!gelf_getrel(data, i, &rel)) { + rel = elf_rel_by_idx(data, i); + if (!rel) { pr_warn(".maps relo #%d: failed to get ELF relo\n", i); return -LIBBPF_ERRNO__FORMAT; } - if (!gelf_getsym(symbols, GELF_R_SYM(rel.r_info), &sym)) { + + sym = elf_sym_by_idx(obj, ELF64_R_SYM(rel->r_info)); + if (!sym) { pr_warn(".maps relo #%d: symbol %zx not found\n", - i, (size_t)GELF_R_SYM(rel.r_info)); + i, (size_t)ELF64_R_SYM(rel->r_info)); return -LIBBPF_ERRNO__FORMAT; } - name = elf_sym_str(obj, sym.st_name) ?: ""; - if (sym.st_shndx != obj->efile.btf_maps_shndx) { + name = elf_sym_str(obj, sym->st_name) ?: ""; + if (sym->st_shndx != obj->efile.btf_maps_shndx) { pr_warn(".maps relo #%d: '%s' isn't a BTF-defined map\n", i, name); return -LIBBPF_ERRNO__RELOC; } - pr_debug(".maps relo #%d: for %zd value %zd rel.r_offset %zu name %d ('%s')\n", - i, (ssize_t)(rel.r_info >> 32), (size_t)sym.st_value, - (size_t)rel.r_offset, sym.st_name, name); + pr_debug(".maps relo #%d: for %zd value %zd rel->r_offset %zu name %d ('%s')\n", + i, (ssize_t)(rel->r_info >> 32), (size_t)sym->st_value, + (size_t)rel->r_offset, sym->st_name, name); for (j = 0; j < obj->nr_maps; j++) { map = &obj->maps[j]; @@ -6089,13 +6124,13 @@ static int bpf_object__collect_map_relos(struct bpf_object *obj, continue; vi = btf_var_secinfos(sec) + map->btf_var_idx; - if (vi->offset <= rel.r_offset && - rel.r_offset + bpf_ptr_sz <= vi->offset + vi->size) + if (vi->offset <= rel->r_offset && + rel->r_offset + bpf_ptr_sz <= vi->offset + vi->size) break; } if (j == obj->nr_maps) { - pr_warn(".maps relo #%d: cannot find map '%s' at rel.r_offset %zu\n", - i, name, (size_t)rel.r_offset); + pr_warn(".maps relo #%d: cannot find map '%s' at rel->r_offset %zu\n", + i, name, (size_t)rel->r_offset); return -EINVAL; } @@ -6122,10 +6157,10 @@ static int bpf_object__collect_map_relos(struct bpf_object *obj, return -EINVAL; moff = btf_member_bit_offset(def, btf_vlen(def) - 1) / 8; - if (rel.r_offset - vi->offset < moff) + if (rel->r_offset - vi->offset < moff) return -EINVAL; - moff = rel.r_offset - vi->offset - moff; + moff = rel->r_offset - vi->offset - moff; /* here we use BPF pointer size, which is always 64 bit, as we * are parsing ELF that was built for BPF target */ @@ -6171,7 +6206,7 @@ static int bpf_object__collect_relos(struct bpf_object *obj) int i, err; for (i = 0; i < obj->efile.nr_reloc_sects; i++) { - GElf_Shdr *shdr = &obj->efile.reloc_sects[i].shdr; + Elf64_Shdr *shdr = obj->efile.reloc_sects[i].shdr; Elf_Data *data = obj->efile.reloc_sects[i].data; int idx = shdr->sh_info; @@ -8362,7 +8397,7 @@ static struct bpf_map *find_struct_ops_map_by_offset(struct bpf_object *obj, /* Collect the reloc from ELF and populate the st_ops->progs[] */ static int bpf_object__collect_st_ops_relos(struct bpf_object *obj, - GElf_Shdr *shdr, Elf_Data *data) + Elf64_Shdr *shdr, Elf_Data *data) { const struct btf_member *member; struct bpf_struct_ops *st_ops; @@ -8370,58 +8405,58 @@ static int bpf_object__collect_st_ops_relos(struct bpf_object *obj, unsigned int shdr_idx; const struct btf *btf; struct bpf_map *map; - Elf_Data *symbols; unsigned int moff, insn_idx; const char *name; __u32 member_idx; - GElf_Sym sym; - GElf_Rel rel; + Elf64_Sym *sym; + Elf64_Rel *rel; int i, nrels; - symbols = obj->efile.symbols; btf = obj->btf; nrels = shdr->sh_size / shdr->sh_entsize; for (i = 0; i < nrels; i++) { - if (!gelf_getrel(data, i, &rel)) { + rel = elf_rel_by_idx(data, i); + if (!rel) { pr_warn("struct_ops reloc: failed to get %d reloc\n", i); return -LIBBPF_ERRNO__FORMAT; } - if (!gelf_getsym(symbols, GELF_R_SYM(rel.r_info), &sym)) { + sym = elf_sym_by_idx(obj, ELF64_R_SYM(rel->r_info)); + if (!sym) { pr_warn("struct_ops reloc: symbol %zx not found\n", - (size_t)GELF_R_SYM(rel.r_info)); + (size_t)ELF64_R_SYM(rel->r_info)); return -LIBBPF_ERRNO__FORMAT; } - name = elf_sym_str(obj, sym.st_name) ?: ""; - map = find_struct_ops_map_by_offset(obj, rel.r_offset); + name = elf_sym_str(obj, sym->st_name) ?: ""; + map = find_struct_ops_map_by_offset(obj, rel->r_offset); if (!map) { - pr_warn("struct_ops reloc: cannot find map at rel.r_offset %zu\n", - (size_t)rel.r_offset); + pr_warn("struct_ops reloc: cannot find map at rel->r_offset %zu\n", + (size_t)rel->r_offset); return -EINVAL; } - moff = rel.r_offset - map->sec_offset; - shdr_idx = sym.st_shndx; + moff = rel->r_offset - map->sec_offset; + shdr_idx = sym->st_shndx; st_ops = map->st_ops; - pr_debug("struct_ops reloc %s: for %lld value %lld shdr_idx %u rel.r_offset %zu map->sec_offset %zu name %d (\'%s\')\n", + pr_debug("struct_ops reloc %s: for %lld value %lld shdr_idx %u rel->r_offset %zu map->sec_offset %zu name %d (\'%s\')\n", map->name, - (long long)(rel.r_info >> 32), - (long long)sym.st_value, - shdr_idx, (size_t)rel.r_offset, - map->sec_offset, sym.st_name, name); + (long long)(rel->r_info >> 32), + (long long)sym->st_value, + shdr_idx, (size_t)rel->r_offset, + map->sec_offset, sym->st_name, name); if (shdr_idx >= SHN_LORESERVE) { - pr_warn("struct_ops reloc %s: rel.r_offset %zu shdr_idx %u unsupported non-static function\n", - map->name, (size_t)rel.r_offset, shdr_idx); + pr_warn("struct_ops reloc %s: rel->r_offset %zu shdr_idx %u unsupported non-static function\n", + map->name, (size_t)rel->r_offset, shdr_idx); return -LIBBPF_ERRNO__RELOC; } - if (sym.st_value % BPF_INSN_SZ) { + if (sym->st_value % BPF_INSN_SZ) { pr_warn("struct_ops reloc %s: invalid target program offset %llu\n", - map->name, (unsigned long long)sym.st_value); + map->name, (unsigned long long)sym->st_value); return -LIBBPF_ERRNO__FORMAT; } - insn_idx = sym.st_value / BPF_INSN_SZ; + insn_idx = sym->st_value / BPF_INSN_SZ; member = find_member_by_offset(st_ops->type, moff * 8); if (!member) { diff --git a/tools/lib/bpf/libbpf_internal.h b/tools/lib/bpf/libbpf_internal.h index 4bbd327a4ce97..13bc7950e3040 100644 --- a/tools/lib/bpf/libbpf_internal.h +++ b/tools/lib/bpf/libbpf_internal.h @@ -52,8 +52,8 @@ #endif /* Older libelf all end up in this expression, for both 32 and 64 bit */ -#ifndef GELF_ST_VISIBILITY -#define GELF_ST_VISIBILITY(o) ((o) & 0x03) +#ifndef ELF64_ST_VISIBILITY +#define ELF64_ST_VISIBILITY(o) ((o) & 0x03) #endif #define BTF_INFO_ENC(kind, kind_flag, vlen) \ diff --git a/tools/lib/bpf/linker.c b/tools/lib/bpf/linker.c index 2df880cefdaee..13e6fdc7d8cb2 100644 --- a/tools/lib/bpf/linker.c +++ b/tools/lib/bpf/linker.c @@ -15,7 +15,6 @@ #include #include #include -#include #include #include "libbpf.h" #include "btf.h" From 25bbbd7a444b1624000389830d46ffdc5b809ee8 Mon Sep 17 00:00:00 2001 From: Andrii Nakryiko Date: Wed, 20 Oct 2021 18:43:58 -0700 Subject: [PATCH 090/181] libbpf: Remove assumptions about uniqueness of .rodata/.data/.bss maps Remove internal libbpf assumption that there can be only one .rodata, .data, and .bss map per BPF object. To achieve that, extend and generalize the scheme that was used for keeping track of relocation ELF sections. Now each ELF section has a temporary extra index that keeps track of logical type of ELF section (relocations, data, read-only data, BSS). Switch relocation to this scheme, as well as .rodata/.data/.bss handling. We don't yet allow multiple .rodata, .data, and .bss sections, but no libbpf internal code makes an assumption that there can be only one of each and thus they can be explicitly referenced by a single index. Next patches will actually allow multiple .rodata and .data sections. Signed-off-by: Andrii Nakryiko Signed-off-by: Alexei Starovoitov Acked-by: Song Liu Link: https://lore.kernel.org/bpf/20211021014404.2635234-5-andrii@kernel.org --- tools/lib/bpf/libbpf.c | 260 ++++++++++++++++++++++------------------- 1 file changed, 140 insertions(+), 120 deletions(-) diff --git a/tools/lib/bpf/libbpf.c b/tools/lib/bpf/libbpf.c index 74f7d9252d132..a7320d272b347 100644 --- a/tools/lib/bpf/libbpf.c +++ b/tools/lib/bpf/libbpf.c @@ -462,6 +462,20 @@ struct module_btf { int fd_array_idx; }; +enum sec_type { + SEC_UNUSED = 0, + SEC_RELO, + SEC_BSS, + SEC_DATA, + SEC_RODATA, +}; + +struct elf_sec_desc { + enum sec_type sec_type; + Elf64_Shdr *shdr; + Elf_Data *data; +}; + struct elf_state { int fd; const void *obj_buf; @@ -469,25 +483,16 @@ struct elf_state { Elf *elf; Elf64_Ehdr *ehdr; Elf_Data *symbols; - Elf_Data *data; - Elf_Data *rodata; - Elf_Data *bss; Elf_Data *st_ops_data; size_t shstrndx; /* section index for section name strings */ size_t strtabidx; - struct { - Elf64_Shdr *shdr; - Elf_Data *data; - } *reloc_sects; - int nr_reloc_sects; + struct elf_sec_desc *secs; + int sec_cnt; int maps_shndx; int btf_maps_shndx; __u32 btf_maps_sec_btf_id; int text_shndx; int symbols_shndx; - int data_shndx; - int rodata_shndx; - int bss_shndx; int st_ops_shndx; }; @@ -506,10 +511,10 @@ struct bpf_object { struct extern_desc *externs; int nr_extern; int kconfig_map_idx; - int rodata_map_idx; bool loaded; bool has_subcalls; + bool has_rodata; struct bpf_gen *gen_loader; @@ -1168,12 +1173,8 @@ static struct bpf_object *bpf_object__new(const char *path, obj->efile.obj_buf_sz = obj_buf_sz; obj->efile.maps_shndx = -1; obj->efile.btf_maps_shndx = -1; - obj->efile.data_shndx = -1; - obj->efile.rodata_shndx = -1; - obj->efile.bss_shndx = -1; obj->efile.st_ops_shndx = -1; obj->kconfig_map_idx = -1; - obj->rodata_map_idx = -1; obj->kern_version = get_kernel_version(); obj->loaded = false; @@ -1193,13 +1194,10 @@ static void bpf_object__elf_finish(struct bpf_object *obj) obj->efile.elf = NULL; } obj->efile.symbols = NULL; - obj->efile.data = NULL; - obj->efile.rodata = NULL; - obj->efile.bss = NULL; obj->efile.st_ops_data = NULL; - zfree(&obj->efile.reloc_sects); - obj->efile.nr_reloc_sects = 0; + zfree(&obj->efile.secs); + obj->efile.sec_cnt = 0; zclose(obj->efile.fd); obj->efile.obj_buf = NULL; obj->efile.obj_buf_sz = 0; @@ -1340,30 +1338,18 @@ static bool bpf_map_type__is_map_in_map(enum bpf_map_type type) static int find_elf_sec_sz(const struct bpf_object *obj, const char *name, __u32 *size) { int ret = -ENOENT; + Elf_Data *data; + Elf_Scn *scn; *size = 0; - if (!name) { + if (!name) return -EINVAL; - } else if (!strcmp(name, DATA_SEC)) { - if (obj->efile.data) - *size = obj->efile.data->d_size; - } else if (!strcmp(name, BSS_SEC)) { - if (obj->efile.bss) - *size = obj->efile.bss->d_size; - } else if (!strcmp(name, RODATA_SEC)) { - if (obj->efile.rodata) - *size = obj->efile.rodata->d_size; - } else if (!strcmp(name, STRUCT_OPS_SEC)) { - if (obj->efile.st_ops_data) - *size = obj->efile.st_ops_data->d_size; - } else { - Elf_Scn *scn = elf_sec_by_name(obj, name); - Elf_Data *data = elf_sec_data(obj, scn); - if (data) { - ret = 0; /* found it */ - *size = data->d_size; - } + scn = elf_sec_by_name(obj, name); + data = elf_sec_data(obj, scn); + if (data) { + ret = 0; /* found it */ + *size = data->d_size; } return *size ? 0 : ret; @@ -1516,34 +1502,39 @@ bpf_object__init_internal_map(struct bpf_object *obj, enum libbpf_map_type type, static int bpf_object__init_global_data_maps(struct bpf_object *obj) { - int err; + struct elf_sec_desc *sec_desc; + int err = 0, sec_idx; /* * Populate obj->maps with libbpf internal maps. */ - if (obj->efile.data_shndx >= 0) { - err = bpf_object__init_internal_map(obj, LIBBPF_MAP_DATA, - obj->efile.data_shndx, - obj->efile.data->d_buf, - obj->efile.data->d_size); - if (err) - return err; - } - if (obj->efile.rodata_shndx >= 0) { - err = bpf_object__init_internal_map(obj, LIBBPF_MAP_RODATA, - obj->efile.rodata_shndx, - obj->efile.rodata->d_buf, - obj->efile.rodata->d_size); - if (err) - return err; - - obj->rodata_map_idx = obj->nr_maps - 1; - } - if (obj->efile.bss_shndx >= 0) { - err = bpf_object__init_internal_map(obj, LIBBPF_MAP_BSS, - obj->efile.bss_shndx, - NULL, - obj->efile.bss->d_size); + for (sec_idx = 1; sec_idx < obj->efile.sec_cnt; sec_idx++) { + sec_desc = &obj->efile.secs[sec_idx]; + + switch (sec_desc->sec_type) { + case SEC_DATA: + err = bpf_object__init_internal_map(obj, LIBBPF_MAP_DATA, + sec_idx, + sec_desc->data->d_buf, + sec_desc->data->d_size); + break; + case SEC_RODATA: + obj->has_rodata = true; + err = bpf_object__init_internal_map(obj, LIBBPF_MAP_RODATA, + sec_idx, + sec_desc->data->d_buf, + sec_desc->data->d_size); + break; + case SEC_BSS: + err = bpf_object__init_internal_map(obj, LIBBPF_MAP_BSS, + sec_idx, + NULL, + sec_desc->data->d_size); + break; + default: + /* skip */ + break; + } if (err) return err; } @@ -3123,6 +3114,7 @@ static int cmp_progs(const void *_a, const void *_b) static int bpf_object__elf_collect(struct bpf_object *obj) { + struct elf_sec_desc *sec_desc; Elf *elf = obj->efile.elf; Elf_Data *btf_ext_data = NULL; Elf_Data *btf_data = NULL; @@ -3132,6 +3124,15 @@ static int bpf_object__elf_collect(struct bpf_object *obj) Elf_Scn *scn; Elf64_Shdr *sh; + /* ELF section indices are 1-based, so allocate +1 element to keep + * indexing simple. Also include 0th invalid section into sec_cnt for + * simpler and more traditional iteration logic. + */ + obj->efile.sec_cnt = 1 + obj->efile.ehdr->e_shnum; + obj->efile.secs = calloc(obj->efile.sec_cnt, sizeof(*obj->efile.secs)); + if (!obj->efile.secs) + return -ENOMEM; + /* a bunch of ELF parsing functionality depends on processing symbols, * so do the first pass and find the symbol table */ @@ -3151,8 +3152,10 @@ static int bpf_object__elf_collect(struct bpf_object *obj) if (!data) return -LIBBPF_ERRNO__FORMAT; + idx = elf_ndxscn(scn); + obj->efile.symbols = data; - obj->efile.symbols_shndx = elf_ndxscn(scn); + obj->efile.symbols_shndx = idx; obj->efile.strtabidx = sh->sh_link; } } @@ -3165,7 +3168,8 @@ static int bpf_object__elf_collect(struct bpf_object *obj) scn = NULL; while ((scn = elf_nextscn(elf, scn)) != NULL) { - idx++; + idx = elf_ndxscn(scn); + sec_desc = &obj->efile.secs[idx]; sh = elf_sec_hdr(obj, scn); if (!sh) @@ -3213,11 +3217,13 @@ static int bpf_object__elf_collect(struct bpf_object *obj) if (err) return err; } else if (strcmp(name, DATA_SEC) == 0) { - obj->efile.data = data; - obj->efile.data_shndx = idx; + sec_desc->sec_type = SEC_DATA; + sec_desc->shdr = sh; + sec_desc->data = data; } else if (strcmp(name, RODATA_SEC) == 0) { - obj->efile.rodata = data; - obj->efile.rodata_shndx = idx; + sec_desc->sec_type = SEC_RODATA; + sec_desc->shdr = sh; + sec_desc->data = data; } else if (strcmp(name, STRUCT_OPS_SEC) == 0) { obj->efile.st_ops_data = data; obj->efile.st_ops_shndx = idx; @@ -3226,33 +3232,25 @@ static int bpf_object__elf_collect(struct bpf_object *obj) idx, name); } } else if (sh->sh_type == SHT_REL) { - int nr_sects = obj->efile.nr_reloc_sects; - void *sects = obj->efile.reloc_sects; - int sec = sh->sh_info; /* points to other section */ + int targ_sec_idx = sh->sh_info; /* points to other section */ /* Only do relo for section with exec instructions */ - if (!section_have_execinstr(obj, sec) && + if (!section_have_execinstr(obj, targ_sec_idx) && strcmp(name, ".rel" STRUCT_OPS_SEC) && strcmp(name, ".rel" MAPS_ELF_SEC)) { pr_info("elf: skipping relo section(%d) %s for section(%d) %s\n", - idx, name, sec, - elf_sec_name(obj, elf_sec_by_idx(obj, sec)) ?: ""); + idx, name, targ_sec_idx, + elf_sec_name(obj, elf_sec_by_idx(obj, targ_sec_idx)) ?: ""); continue; } - sects = libbpf_reallocarray(sects, nr_sects + 1, - sizeof(*obj->efile.reloc_sects)); - if (!sects) - return -ENOMEM; - - obj->efile.reloc_sects = sects; - obj->efile.nr_reloc_sects++; - - obj->efile.reloc_sects[nr_sects].shdr = sh; - obj->efile.reloc_sects[nr_sects].data = data; + sec_desc->sec_type = SEC_RELO; + sec_desc->shdr = sh; + sec_desc->data = data; } else if (sh->sh_type == SHT_NOBITS && strcmp(name, BSS_SEC) == 0) { - obj->efile.bss = data; - obj->efile.bss_shndx = idx; + sec_desc->sec_type = SEC_BSS; + sec_desc->shdr = sh; + sec_desc->data = data; } else { pr_info("elf: skipping section(%d) %s (size %zu)\n", idx, name, (size_t)sh->sh_size); @@ -3732,9 +3730,14 @@ bpf_object__find_program_by_name(const struct bpf_object *obj, static bool bpf_object__shndx_is_data(const struct bpf_object *obj, int shndx) { - return shndx == obj->efile.data_shndx || - shndx == obj->efile.bss_shndx || - shndx == obj->efile.rodata_shndx; + switch (obj->efile.secs[shndx].sec_type) { + case SEC_BSS: + case SEC_DATA: + case SEC_RODATA: + return true; + default: + return false; + } } static bool bpf_object__shndx_is_maps(const struct bpf_object *obj, @@ -3747,16 +3750,19 @@ static bool bpf_object__shndx_is_maps(const struct bpf_object *obj, static enum libbpf_map_type bpf_object__section_to_libbpf_map_type(const struct bpf_object *obj, int shndx) { - if (shndx == obj->efile.data_shndx) - return LIBBPF_MAP_DATA; - else if (shndx == obj->efile.bss_shndx) + if (shndx == obj->efile.symbols_shndx) + return LIBBPF_MAP_KCONFIG; + + switch (obj->efile.secs[shndx].sec_type) { + case SEC_BSS: return LIBBPF_MAP_BSS; - else if (shndx == obj->efile.rodata_shndx) + case SEC_DATA: + return LIBBPF_MAP_DATA; + case SEC_RODATA: return LIBBPF_MAP_RODATA; - else if (shndx == obj->efile.symbols_shndx) - return LIBBPF_MAP_KCONFIG; - else + default: return LIBBPF_MAP_UNSPEC; + } } static int bpf_program__record_reloc(struct bpf_program *prog, @@ -3892,7 +3898,7 @@ static int bpf_program__record_reloc(struct bpf_program *prog, } for (map_idx = 0; map_idx < nr_maps; map_idx++) { map = &obj->maps[map_idx]; - if (map->libbpf_type != type) + if (map->libbpf_type != type || map->sec_idx != sym->st_shndx) continue; pr_debug("prog '%s': found data map %zd (%s, sec %d, off %zu) for insn %u\n", prog->name, map_idx, map->name, map->sec_idx, @@ -6205,10 +6211,18 @@ static int bpf_object__collect_relos(struct bpf_object *obj) { int i, err; - for (i = 0; i < obj->efile.nr_reloc_sects; i++) { - Elf64_Shdr *shdr = obj->efile.reloc_sects[i].shdr; - Elf_Data *data = obj->efile.reloc_sects[i].data; - int idx = shdr->sh_info; + for (i = 0; i < obj->efile.sec_cnt; i++) { + struct elf_sec_desc *sec_desc = &obj->efile.secs[i]; + Elf64_Shdr *shdr; + Elf_Data *data; + int idx; + + if (sec_desc->sec_type != SEC_RELO) + continue; + + shdr = sec_desc->shdr; + data = sec_desc->data; + idx = shdr->sh_info; if (shdr->sh_type != SHT_REL) { pr_warn("internal error at %d\n", __LINE__); @@ -6331,6 +6345,7 @@ load_program(struct bpf_program *prog, struct bpf_insn *insns, int insns_cnt, char *license, __u32 kern_version, int *pfd) { struct bpf_prog_load_params load_attr = {}; + struct bpf_object *obj = prog->obj; char *cp, errmsg[STRERR_BUFSIZE]; size_t log_buf_size = 0; char *log_buf = NULL; @@ -6351,7 +6366,7 @@ load_program(struct bpf_program *prog, struct bpf_insn *insns, int insns_cnt, load_attr.prog_type = prog->type; load_attr.expected_attach_type = prog->expected_attach_type; - if (kernel_supports(prog->obj, FEAT_PROG_NAME)) + if (kernel_supports(obj, FEAT_PROG_NAME)) load_attr.name = prog->name; load_attr.insns = insns; load_attr.insn_cnt = insns_cnt; @@ -6364,8 +6379,8 @@ load_program(struct bpf_program *prog, struct bpf_insn *insns, int insns_cnt, load_attr.prog_ifindex = prog->prog_ifindex; /* specify func_info/line_info only if kernel supports them */ - btf_fd = bpf_object__btf_fd(prog->obj); - if (btf_fd >= 0 && kernel_supports(prog->obj, FEAT_BTF_FUNC)) { + btf_fd = bpf_object__btf_fd(obj); + if (btf_fd >= 0 && kernel_supports(obj, FEAT_BTF_FUNC)) { load_attr.prog_btf_fd = btf_fd; load_attr.func_info = prog->func_info; load_attr.func_info_rec_size = prog->func_info_rec_size; @@ -6376,7 +6391,7 @@ load_program(struct bpf_program *prog, struct bpf_insn *insns, int insns_cnt, } load_attr.log_level = prog->log_level; load_attr.prog_flags = prog->prog_flags; - load_attr.fd_array = prog->obj->fd_array; + load_attr.fd_array = obj->fd_array; /* adjust load_attr if sec_def provides custom preload callback */ if (prog->sec_def && prog->sec_def->preload_fn) { @@ -6388,9 +6403,9 @@ load_program(struct bpf_program *prog, struct bpf_insn *insns, int insns_cnt, } } - if (prog->obj->gen_loader) { - bpf_gen__prog_load(prog->obj->gen_loader, &load_attr, - prog - prog->obj->programs); + if (obj->gen_loader) { + bpf_gen__prog_load(obj->gen_loader, &load_attr, + prog - obj->programs); *pfd = -1; return 0; } @@ -6411,16 +6426,21 @@ load_program(struct bpf_program *prog, struct bpf_insn *insns, int insns_cnt, if (log_buf && load_attr.log_level) pr_debug("verifier log:\n%s", log_buf); - if (prog->obj->rodata_map_idx >= 0 && - kernel_supports(prog->obj, FEAT_PROG_BIND_MAP)) { - struct bpf_map *rodata_map = - &prog->obj->maps[prog->obj->rodata_map_idx]; + if (obj->has_rodata && kernel_supports(obj, FEAT_PROG_BIND_MAP)) { + struct bpf_map *map; + int i; + + for (i = 0; i < obj->nr_maps; i++) { + map = &prog->obj->maps[i]; + if (map->libbpf_type != LIBBPF_MAP_RODATA) + continue; - if (bpf_prog_bind_map(ret, bpf_map__fd(rodata_map), NULL)) { - cp = libbpf_strerror_r(errno, errmsg, sizeof(errmsg)); - pr_warn("prog '%s': failed to bind .rodata map: %s\n", - prog->name, cp); - /* Don't fail hard if can't bind rodata. */ + if (bpf_prog_bind_map(ret, bpf_map__fd(map), NULL)) { + cp = libbpf_strerror_r(errno, errmsg, sizeof(errmsg)); + pr_warn("prog '%s': failed to bind .rodata map: %s\n", + prog->name, cp); + /* Don't fail hard if can't bind rodata. */ + } } } From 8654b4d35e6c915ef456c14320ec8720383e81a7 Mon Sep 17 00:00:00 2001 From: Andrii Nakryiko Date: Wed, 20 Oct 2021 18:43:59 -0700 Subject: [PATCH 091/181] bpftool: Support multiple .rodata/.data internal maps in skeleton Remove the assumption about only single instance of each of .rodata and .data internal maps. Nothing changes for '.rodata' and '.data' maps, but new '.rodata.something' map will get 'rodata_something' section in BPF skeleton for them (as well as having struct bpf_map * field in maps section with the same field name). Signed-off-by: Andrii Nakryiko Signed-off-by: Alexei Starovoitov Acked-by: Song Liu Link: https://lore.kernel.org/bpf/20211021014404.2635234-6-andrii@kernel.org --- tools/bpf/bpftool/gen.c | 107 ++++++++++++++++++++++------------------ 1 file changed, 60 insertions(+), 47 deletions(-) diff --git a/tools/bpf/bpftool/gen.c b/tools/bpf/bpftool/gen.c index b2ffc18eafc18..59afe9a2ca3c0 100644 --- a/tools/bpf/bpftool/gen.c +++ b/tools/bpf/bpftool/gen.c @@ -33,6 +33,11 @@ static void sanitize_identifier(char *name) name[i] = '_'; } +static bool str_has_prefix(const char *str, const char *prefix) +{ + return strncmp(str, prefix, strlen(prefix)) == 0; +} + static bool str_has_suffix(const char *str, const char *suffix) { size_t i, n1 = strlen(str), n2 = strlen(suffix); @@ -67,23 +72,47 @@ static void get_header_guard(char *guard, const char *obj_name) guard[i] = toupper(guard[i]); } -static const char *get_map_ident(const struct bpf_map *map) +static bool get_map_ident(const struct bpf_map *map, char *buf, size_t buf_sz) { + static const char *sfxs[] = { ".data", ".rodata", ".bss", ".kconfig" }; const char *name = bpf_map__name(map); + int i, n; + + if (!bpf_map__is_internal(map)) { + snprintf(buf, buf_sz, "%s", name); + return true; + } + + for (i = 0, n = ARRAY_SIZE(sfxs); i < n; i++) { + const char *sfx = sfxs[i], *p; + + p = strstr(name, sfx); + if (p) { + snprintf(buf, buf_sz, "%s", p + 1); + sanitize_identifier(buf); + return true; + } + } - if (!bpf_map__is_internal(map)) - return name; - - if (str_has_suffix(name, ".data")) - return "data"; - else if (str_has_suffix(name, ".rodata")) - return "rodata"; - else if (str_has_suffix(name, ".bss")) - return "bss"; - else if (str_has_suffix(name, ".kconfig")) - return "kconfig"; - else - return NULL; + return false; +} + +static bool get_datasec_ident(const char *sec_name, char *buf, size_t buf_sz) +{ + static const char *pfxs[] = { ".data", ".rodata", ".bss", ".kconfig" }; + int i, n; + + for (i = 0, n = ARRAY_SIZE(pfxs); i < n; i++) { + const char *pfx = pfxs[i]; + + if (str_has_prefix(sec_name, pfx)) { + snprintf(buf, buf_sz, "%s", sec_name + 1); + sanitize_identifier(buf); + return true; + } + } + + return false; } static void codegen_btf_dump_printf(void *ctx, const char *fmt, va_list args) @@ -100,24 +129,14 @@ static int codegen_datasec_def(struct bpf_object *obj, const char *sec_name = btf__name_by_offset(btf, sec->name_off); const struct btf_var_secinfo *sec_var = btf_var_secinfos(sec); int i, err, off = 0, pad_cnt = 0, vlen = btf_vlen(sec); - const char *sec_ident; - char var_ident[256]; + char var_ident[256], sec_ident[256]; bool strip_mods = false; - if (strcmp(sec_name, ".data") == 0) { - sec_ident = "data"; - strip_mods = true; - } else if (strcmp(sec_name, ".bss") == 0) { - sec_ident = "bss"; - strip_mods = true; - } else if (strcmp(sec_name, ".rodata") == 0) { - sec_ident = "rodata"; - strip_mods = true; - } else if (strcmp(sec_name, ".kconfig") == 0) { - sec_ident = "kconfig"; - } else { + if (!get_datasec_ident(sec_name, sec_ident, sizeof(sec_ident))) return 0; - } + + if (strcmp(sec_name, ".kconfig") != 0) + strip_mods = true; printf(" struct %s__%s {\n", obj_name, sec_ident); for (i = 0; i < vlen; i++, sec_var++) { @@ -385,6 +404,7 @@ static void codegen_destroy(struct bpf_object *obj, const char *obj_name) { struct bpf_program *prog; struct bpf_map *map; + char ident[256]; codegen("\ \n\ @@ -405,10 +425,7 @@ static void codegen_destroy(struct bpf_object *obj, const char *obj_name) } bpf_object__for_each_map(map, obj) { - const char *ident; - - ident = get_map_ident(map); - if (!ident) + if (!get_map_ident(map, ident, sizeof(ident))) continue; if (bpf_map__is_internal(map) && (bpf_map__def(map)->map_flags & BPF_F_MMAPABLE)) @@ -432,6 +449,7 @@ static int gen_trace(struct bpf_object *obj, const char *obj_name, const char *h struct bpf_object_load_attr load_attr = {}; DECLARE_LIBBPF_OPTS(gen_loader_opts, opts); struct bpf_map *map; + char ident[256]; int err = 0; err = bpf_object__gen_loader(obj, &opts); @@ -477,12 +495,10 @@ static int gen_trace(struct bpf_object *obj, const char *obj_name, const char *h ", obj_name, opts.data_sz); bpf_object__for_each_map(map, obj) { - const char *ident; const void *mmap_data = NULL; size_t mmap_size = 0; - ident = get_map_ident(map); - if (!ident) + if (!get_map_ident(map, ident, sizeof(ident))) continue; if (!bpf_map__is_internal(map) || @@ -544,15 +560,15 @@ static int gen_trace(struct bpf_object *obj, const char *obj_name, const char *h return err; \n\ ", obj_name); bpf_object__for_each_map(map, obj) { - const char *ident, *mmap_flags; + const char *mmap_flags; - ident = get_map_ident(map); - if (!ident) + if (!get_map_ident(map, ident, sizeof(ident))) continue; if (!bpf_map__is_internal(map) || !(bpf_map__def(map)->map_flags & BPF_F_MMAPABLE)) continue; + if (bpf_map__def(map)->map_flags & BPF_F_RDONLY_PROG) mmap_flags = "PROT_READ"; else @@ -602,7 +618,8 @@ static int do_skeleton(int argc, char **argv) DECLARE_LIBBPF_OPTS(bpf_object_open_opts, opts); char obj_name[MAX_OBJ_NAME_LEN] = "", *obj_data; struct bpf_object *obj = NULL; - const char *file, *ident; + const char *file; + char ident[256]; struct bpf_program *prog; int fd, err = -1; struct bpf_map *map; @@ -673,8 +690,7 @@ static int do_skeleton(int argc, char **argv) } bpf_object__for_each_map(map, obj) { - ident = get_map_ident(map); - if (!ident) { + if (!get_map_ident(map, ident, sizeof(ident))) { p_err("ignoring unrecognized internal map '%s'...", bpf_map__name(map)); continue; @@ -727,8 +743,7 @@ static int do_skeleton(int argc, char **argv) if (map_cnt) { printf("\tstruct {\n"); bpf_object__for_each_map(map, obj) { - ident = get_map_ident(map); - if (!ident) + if (!get_map_ident(map, ident, sizeof(ident))) continue; if (use_loader) printf("\t\tstruct bpf_map_desc %s;\n", ident); @@ -897,9 +912,7 @@ static int do_skeleton(int argc, char **argv) ); i = 0; bpf_object__for_each_map(map, obj) { - ident = get_map_ident(map); - - if (!ident) + if (!get_map_ident(map, ident, sizeof(ident))) continue; codegen("\ From ef9356d392f980b3b192668fa05b2eaaad127da1 Mon Sep 17 00:00:00 2001 From: Andrii Nakryiko Date: Wed, 20 Oct 2021 18:44:00 -0700 Subject: [PATCH 092/181] bpftool: Improve skeleton generation for data maps without DATASEC type It can happen that some data sections (e.g., .rodata.cst16, containing compiler populated string constants) won't have a corresponding BTF DATASEC type. Now that libbpf supports .rodata.* and .data.* sections, situation like that will cause invalid BPF skeleton to be generated that won't compile successfully, as some parts of skeleton would assume memory-mapped struct definitions for each special data section. Fix this by generating empty struct definitions for such data sections. Signed-off-by: Andrii Nakryiko Signed-off-by: Alexei Starovoitov Acked-by: Song Liu Link: https://lore.kernel.org/bpf/20211021014404.2635234-7-andrii@kernel.org --- tools/bpf/bpftool/gen.c | 51 ++++++++++++++++++++++++++++++++++++----- 1 file changed, 45 insertions(+), 6 deletions(-) diff --git a/tools/bpf/bpftool/gen.c b/tools/bpf/bpftool/gen.c index 59afe9a2ca3c0..c446405ab73fe 100644 --- a/tools/bpf/bpftool/gen.c +++ b/tools/bpf/bpftool/gen.c @@ -213,22 +213,61 @@ static int codegen_datasecs(struct bpf_object *obj, const char *obj_name) struct btf *btf = bpf_object__btf(obj); int n = btf__get_nr_types(btf); struct btf_dump *d; + struct bpf_map *map; + const struct btf_type *sec; + char sec_ident[256], map_ident[256]; int i, err = 0; d = btf_dump__new(btf, NULL, NULL, codegen_btf_dump_printf); if (IS_ERR(d)) return PTR_ERR(d); - for (i = 1; i <= n; i++) { - const struct btf_type *t = btf__type_by_id(btf, i); + bpf_object__for_each_map(map, obj) { + /* only generate definitions for memory-mapped internal maps */ + if (!bpf_map__is_internal(map)) + continue; + if (!(bpf_map__def(map)->map_flags & BPF_F_MMAPABLE)) + continue; - if (!btf_is_datasec(t)) + if (!get_map_ident(map, map_ident, sizeof(map_ident))) continue; - err = codegen_datasec_def(obj, btf, d, t, obj_name); - if (err) - goto out; + sec = NULL; + for (i = 1; i <= n; i++) { + const struct btf_type *t = btf__type_by_id(btf, i); + const char *name; + + if (!btf_is_datasec(t)) + continue; + + name = btf__str_by_offset(btf, t->name_off); + if (!get_datasec_ident(name, sec_ident, sizeof(sec_ident))) + continue; + + if (strcmp(sec_ident, map_ident) == 0) { + sec = t; + break; + } + } + + /* In some cases (e.g., sections like .rodata.cst16 containing + * compiler allocated string constants only) there will be + * special internal maps with no corresponding DATASEC BTF + * type. In such case, generate empty structs for each such + * map. It will still be memory-mapped and its contents + * accessible from user-space through BPF skeleton. + */ + if (!sec) { + printf(" struct %s__%s {\n", obj_name, map_ident); + printf(" } *%s;\n", map_ident); + } else { + err = codegen_datasec_def(obj, btf, d, sec, obj_name); + if (err) + goto out; + } } + + out: btf_dump__free(d); return err; From aed659170a3171e425913ae259d46396fb9c10ef Mon Sep 17 00:00:00 2001 From: Andrii Nakryiko Date: Wed, 20 Oct 2021 18:44:01 -0700 Subject: [PATCH 093/181] libbpf: Support multiple .rodata.* and .data.* BPF maps Add support for having multiple .rodata and .data data sections ([0]). .rodata/.data are supported like the usual, but now also .rodata. and .data. are also supported. Each such section will get its own backing BPF_MAP_TYPE_ARRAY, just like .rodata and .data. Multiple .bss maps are not supported, as the whole '.bss' name is confusing and might be deprecated soon, as well as user would need to specify custom ELF section with SEC() attribute anyway, so might as well stick to just .data.* and .rodata.* convention. User-visible map name for such new maps is going to be just their ELF section names. [0] https://github.com/libbpf/libbpf/issues/274 Signed-off-by: Andrii Nakryiko Signed-off-by: Alexei Starovoitov Acked-by: Song Liu Link: https://lore.kernel.org/bpf/20211021014404.2635234-8-andrii@kernel.org --- tools/lib/bpf/libbpf.c | 130 ++++++++++++++++++++++++++++++++--------- 1 file changed, 101 insertions(+), 29 deletions(-) diff --git a/tools/lib/bpf/libbpf.c b/tools/lib/bpf/libbpf.c index a7320d272b347..031f4611e0ffe 100644 --- a/tools/lib/bpf/libbpf.c +++ b/tools/lib/bpf/libbpf.c @@ -370,15 +370,14 @@ enum libbpf_map_type { LIBBPF_MAP_KCONFIG, }; -static const char * const libbpf_type_to_btf_name[] = { - [LIBBPF_MAP_DATA] = DATA_SEC, - [LIBBPF_MAP_BSS] = BSS_SEC, - [LIBBPF_MAP_RODATA] = RODATA_SEC, - [LIBBPF_MAP_KCONFIG] = KCONFIG_SEC, -}; - struct bpf_map { char *name; + /* real_name is defined for special internal maps (.rodata*, + * .data*, .bss, .kconfig) and preserves their original ELF section + * name. This is important to be be able to find corresponding BTF + * DATASEC information. + */ + char *real_name; int fd; int sec_idx; size_t sec_offset; @@ -1429,17 +1428,55 @@ static size_t bpf_map_mmap_sz(const struct bpf_map *map) return map_sz; } -static char *internal_map_name(struct bpf_object *obj, - enum libbpf_map_type type) +static char *internal_map_name(struct bpf_object *obj, const char *real_name) { char map_name[BPF_OBJ_NAME_LEN], *p; - const char *sfx = libbpf_type_to_btf_name[type]; - int sfx_len = max((size_t)7, strlen(sfx)); - int pfx_len = min((size_t)BPF_OBJ_NAME_LEN - sfx_len - 1, - strlen(obj->name)); + int pfx_len, sfx_len = max((size_t)7, strlen(real_name)); + + /* This is one of the more confusing parts of libbpf for various + * reasons, some of which are historical. The original idea for naming + * internal names was to include as much of BPF object name prefix as + * possible, so that it can be distinguished from similar internal + * maps of a different BPF object. + * As an example, let's say we have bpf_object named 'my_object_name' + * and internal map corresponding to '.rodata' ELF section. The final + * map name advertised to user and to the kernel will be + * 'my_objec.rodata', taking first 8 characters of object name and + * entire 7 characters of '.rodata'. + * Somewhat confusingly, if internal map ELF section name is shorter + * than 7 characters, e.g., '.bss', we still reserve 7 characters + * for the suffix, even though we only have 4 actual characters, and + * resulting map will be called 'my_objec.bss', not even using all 15 + * characters allowed by the kernel. Oh well, at least the truncated + * object name is somewhat consistent in this case. But if the map + * name is '.kconfig', we'll still have entirety of '.kconfig' added + * (8 chars) and thus will be left with only first 7 characters of the + * object name ('my_obje'). Happy guessing, user, that the final map + * name will be "my_obje.kconfig". + * Now, with libbpf starting to support arbitrarily named .rodata.* + * and .data.* data sections, it's possible that ELF section name is + * longer than allowed 15 chars, so we now need to be careful to take + * only up to 15 first characters of ELF name, taking no BPF object + * name characters at all. So '.rodata.abracadabra' will result in + * '.rodata.abracad' kernel and user-visible name. + * We need to keep this convoluted logic intact for .data, .bss and + * .rodata maps, but for new custom .data.custom and .rodata.custom + * maps we use their ELF names as is, not prepending bpf_object name + * in front. We still need to truncate them to 15 characters for the + * kernel. Full name can be recovered for such maps by using DATASEC + * BTF type associated with such map's value type, though. + */ + if (sfx_len >= BPF_OBJ_NAME_LEN) + sfx_len = BPF_OBJ_NAME_LEN - 1; + + /* if there are two or more dots in map name, it's a custom dot map */ + if (strchr(real_name + 1, '.') != NULL) + pfx_len = 0; + else + pfx_len = min((size_t)BPF_OBJ_NAME_LEN - sfx_len - 1, strlen(obj->name)); snprintf(map_name, sizeof(map_name), "%.*s%.*s", pfx_len, obj->name, - sfx_len, libbpf_type_to_btf_name[type]); + sfx_len, real_name); /* sanitise map name to characters allowed by kernel */ for (p = map_name; *p && p < map_name + sizeof(map_name); p++) @@ -1451,7 +1488,7 @@ static char *internal_map_name(struct bpf_object *obj, static int bpf_object__init_internal_map(struct bpf_object *obj, enum libbpf_map_type type, - int sec_idx, void *data, size_t data_sz) + const char *real_name, int sec_idx, void *data, size_t data_sz) { struct bpf_map_def *def; struct bpf_map *map; @@ -1464,9 +1501,11 @@ bpf_object__init_internal_map(struct bpf_object *obj, enum libbpf_map_type type, map->libbpf_type = type; map->sec_idx = sec_idx; map->sec_offset = 0; - map->name = internal_map_name(obj, type); - if (!map->name) { - pr_warn("failed to alloc map name\n"); + map->real_name = strdup(real_name); + map->name = internal_map_name(obj, real_name); + if (!map->real_name || !map->name) { + zfree(&map->real_name); + zfree(&map->name); return -ENOMEM; } @@ -1489,6 +1528,7 @@ bpf_object__init_internal_map(struct bpf_object *obj, enum libbpf_map_type type, map->mmaped = NULL; pr_warn("failed to alloc map '%s' content buffer: %d\n", map->name, err); + zfree(&map->real_name); zfree(&map->name); return err; } @@ -1503,6 +1543,7 @@ bpf_object__init_internal_map(struct bpf_object *obj, enum libbpf_map_type type, static int bpf_object__init_global_data_maps(struct bpf_object *obj) { struct elf_sec_desc *sec_desc; + const char *sec_name; int err = 0, sec_idx; /* @@ -1513,21 +1554,24 @@ static int bpf_object__init_global_data_maps(struct bpf_object *obj) switch (sec_desc->sec_type) { case SEC_DATA: + sec_name = elf_sec_name(obj, elf_sec_by_idx(obj, sec_idx)); err = bpf_object__init_internal_map(obj, LIBBPF_MAP_DATA, - sec_idx, + sec_name, sec_idx, sec_desc->data->d_buf, sec_desc->data->d_size); break; case SEC_RODATA: obj->has_rodata = true; + sec_name = elf_sec_name(obj, elf_sec_by_idx(obj, sec_idx)); err = bpf_object__init_internal_map(obj, LIBBPF_MAP_RODATA, - sec_idx, + sec_name, sec_idx, sec_desc->data->d_buf, sec_desc->data->d_size); break; case SEC_BSS: + sec_name = elf_sec_name(obj, elf_sec_by_idx(obj, sec_idx)); err = bpf_object__init_internal_map(obj, LIBBPF_MAP_BSS, - sec_idx, + sec_name, sec_idx, NULL, sec_desc->data->d_size); break; @@ -1831,7 +1875,7 @@ static int bpf_object__init_kconfig_map(struct bpf_object *obj) map_sz = last_ext->kcfg.data_off + last_ext->kcfg.sz; err = bpf_object__init_internal_map(obj, LIBBPF_MAP_KCONFIG, - obj->efile.symbols_shndx, + ".kconfig", obj->efile.symbols_shndx, NULL, map_sz); if (err) return err; @@ -1931,7 +1975,7 @@ static int bpf_object__init_user_maps(struct bpf_object *obj, bool strict) map->name = strdup(map_name); if (!map->name) { - pr_warn("failed to alloc map name\n"); + pr_warn("map '%s': failed to alloc map name\n", map_name); return -ENOMEM; } pr_debug("map %d is \"%s\"\n", i, map->name); @@ -3216,11 +3260,13 @@ static int bpf_object__elf_collect(struct bpf_object *obj) err = bpf_object__add_programs(obj, data, name, idx); if (err) return err; - } else if (strcmp(name, DATA_SEC) == 0) { + } else if (strcmp(name, DATA_SEC) == 0 || + str_has_pfx(name, DATA_SEC ".")) { sec_desc->sec_type = SEC_DATA; sec_desc->shdr = sh; sec_desc->data = data; - } else if (strcmp(name, RODATA_SEC) == 0) { + } else if (strcmp(name, RODATA_SEC) == 0 || + str_has_pfx(name, RODATA_SEC ".")) { sec_desc->sec_type = SEC_RODATA; sec_desc->shdr = sh; sec_desc->data = data; @@ -4060,8 +4106,7 @@ static int bpf_map_find_btf_info(struct bpf_object *obj, struct bpf_map *map) * LLVM annotates global data differently in BTF, that is, * only as '.data', '.bss' or '.rodata'. */ - ret = btf__find_by_name(obj->btf, - libbpf_type_to_btf_name[map->libbpf_type]); + ret = btf__find_by_name(obj->btf, map->real_name); } if (ret < 0) return ret; @@ -7831,6 +7876,7 @@ static void bpf_map__destroy(struct bpf_map *map) } zfree(&map->name); + zfree(&map->real_name); zfree(&map->pin_path); if (map->fd >= 0) @@ -8755,9 +8801,30 @@ const struct bpf_map_def *bpf_map__def(const struct bpf_map *map) return map ? &map->def : libbpf_err_ptr(-EINVAL); } +static bool map_uses_real_name(const struct bpf_map *map) +{ + /* Since libbpf started to support custom .data.* and .rodata.* maps, + * their user-visible name differs from kernel-visible name. Users see + * such map's corresponding ELF section name as a map name. + * This check distinguishes .data/.rodata from .data.* and .rodata.* + * maps to know which name has to be returned to the user. + */ + if (map->libbpf_type == LIBBPF_MAP_DATA && strcmp(map->real_name, DATA_SEC) != 0) + return true; + if (map->libbpf_type == LIBBPF_MAP_RODATA && strcmp(map->real_name, RODATA_SEC) != 0) + return true; + return false; +} + const char *bpf_map__name(const struct bpf_map *map) { - return map ? map->name : NULL; + if (!map) + return NULL; + + if (map_uses_real_name(map)) + return map->real_name; + + return map->name; } enum bpf_map_type bpf_map__type(const struct bpf_map *map) @@ -8976,7 +9043,12 @@ bpf_object__find_map_by_name(const struct bpf_object *obj, const char *name) struct bpf_map *pos; bpf_object__for_each_map(pos, obj) { - if (pos->name && !strcmp(pos->name, name)) + if (map_uses_real_name(pos)) { + if (strcmp(pos->real_name, name) == 0) + return pos; + continue; + } + if (strcmp(pos->name, name) == 0) return pos; } return errno = ENOENT, NULL; From 30c5bd96476ced0d3e08372be5186ff7f421d10c Mon Sep 17 00:00:00 2001 From: Andrii Nakryiko Date: Wed, 20 Oct 2021 18:44:02 -0700 Subject: [PATCH 094/181] selftests/bpf: Demonstrate use of custom .rodata/.data sections Enhance existing selftests to demonstrate the use of custom .data/.rodata sections. Signed-off-by: Andrii Nakryiko Signed-off-by: Alexei Starovoitov Acked-by: Song Liu Link: https://lore.kernel.org/bpf/20211021014404.2635234-9-andrii@kernel.org --- .../selftests/bpf/prog_tests/skeleton.c | 29 +++++++++++++++++++ .../selftests/bpf/progs/test_skeleton.c | 18 ++++++++++++ 2 files changed, 47 insertions(+) diff --git a/tools/testing/selftests/bpf/prog_tests/skeleton.c b/tools/testing/selftests/bpf/prog_tests/skeleton.c index fe1e204a65c6d..180afd632f4ca 100644 --- a/tools/testing/selftests/bpf/prog_tests/skeleton.c +++ b/tools/testing/selftests/bpf/prog_tests/skeleton.c @@ -16,10 +16,13 @@ void test_skeleton(void) struct test_skeleton* skel; struct test_skeleton__bss *bss; struct test_skeleton__data *data; + struct test_skeleton__data_dyn *data_dyn; struct test_skeleton__rodata *rodata; + struct test_skeleton__rodata_dyn *rodata_dyn; struct test_skeleton__kconfig *kcfg; const void *elf_bytes; size_t elf_bytes_sz = 0; + int i; skel = test_skeleton__open(); if (CHECK(!skel, "skel_open", "failed to open skeleton\n")) @@ -30,7 +33,12 @@ void test_skeleton(void) bss = skel->bss; data = skel->data; + data_dyn = skel->data_dyn; rodata = skel->rodata; + rodata_dyn = skel->rodata_dyn; + + ASSERT_STREQ(bpf_map__name(skel->maps.rodata_dyn), ".rodata.dyn", "rodata_dyn_name"); + ASSERT_STREQ(bpf_map__name(skel->maps.data_dyn), ".data.dyn", "data_dyn_name"); /* validate values are pre-initialized correctly */ CHECK(data->in1 != -1, "in1", "got %d != exp %d\n", data->in1, -1); @@ -46,6 +54,12 @@ void test_skeleton(void) CHECK(rodata->in.in6 != 0, "in6", "got %d != exp %d\n", rodata->in.in6, 0); CHECK(bss->out6 != 0, "out6", "got %d != exp %d\n", bss->out6, 0); + ASSERT_EQ(rodata_dyn->in_dynarr_sz, 0, "in_dynarr_sz"); + for (i = 0; i < 4; i++) + ASSERT_EQ(rodata_dyn->in_dynarr[i], -(i + 1), "in_dynarr"); + for (i = 0; i < 4; i++) + ASSERT_EQ(data_dyn->out_dynarr[i], i + 1, "out_dynarr"); + /* validate we can pre-setup global variables, even in .bss */ data->in1 = 10; data->in2 = 11; @@ -53,6 +67,10 @@ void test_skeleton(void) bss->in4 = 13; rodata->in.in6 = 14; + rodata_dyn->in_dynarr_sz = 4; + for (i = 0; i < 4; i++) + rodata_dyn->in_dynarr[i] = i + 10; + err = test_skeleton__load(skel); if (CHECK(err, "skel_load", "failed to load skeleton: %d\n", err)) goto cleanup; @@ -64,6 +82,10 @@ void test_skeleton(void) CHECK(bss->in4 != 13, "in4", "got %lld != exp %lld\n", bss->in4, 13LL); CHECK(rodata->in.in6 != 14, "in6", "got %d != exp %d\n", rodata->in.in6, 14); + ASSERT_EQ(rodata_dyn->in_dynarr_sz, 4, "in_dynarr_sz"); + for (i = 0; i < 4; i++) + ASSERT_EQ(rodata_dyn->in_dynarr[i], i + 10, "in_dynarr"); + /* now set new values and attach to get them into outX variables */ data->in1 = 1; data->in2 = 2; @@ -73,6 +95,8 @@ void test_skeleton(void) bss->in5.b = 6; kcfg = skel->kconfig; + skel->data_read_mostly->read_mostly_var = 123; + err = test_skeleton__attach(skel); if (CHECK(err, "skel_attach", "skeleton attach failed: %d\n", err)) goto cleanup; @@ -93,6 +117,11 @@ void test_skeleton(void) CHECK(bss->kern_ver != kcfg->LINUX_KERNEL_VERSION, "ext2", "got %d != exp %d\n", bss->kern_ver, kcfg->LINUX_KERNEL_VERSION); + for (i = 0; i < 4; i++) + ASSERT_EQ(data_dyn->out_dynarr[i], i + 10, "out_dynarr"); + + ASSERT_EQ(skel->bss->out_mostly_var, 123, "out_mostly_var"); + elf_bytes = test_skeleton__elf_bytes(&elf_bytes_sz); ASSERT_OK_PTR(elf_bytes, "elf_bytes"); ASSERT_GE(elf_bytes_sz, 0, "elf_bytes_sz"); diff --git a/tools/testing/selftests/bpf/progs/test_skeleton.c b/tools/testing/selftests/bpf/progs/test_skeleton.c index 441fa1c552c8d..1b1187d2967bd 100644 --- a/tools/testing/selftests/bpf/progs/test_skeleton.c +++ b/tools/testing/selftests/bpf/progs/test_skeleton.c @@ -5,6 +5,8 @@ #include #include +#define __read_mostly SEC(".data.read_mostly") + struct s { int a; long long b; @@ -40,9 +42,20 @@ int kern_ver = 0; struct s out5 = {}; + +const volatile int in_dynarr_sz SEC(".rodata.dyn"); +const volatile int in_dynarr[4] SEC(".rodata.dyn") = { -1, -2, -3, -4 }; + +int out_dynarr[4] SEC(".data.dyn") = { 1, 2, 3, 4 }; + +int read_mostly_var __read_mostly; +int out_mostly_var; + SEC("raw_tp/sys_enter") int handler(const void *ctx) { + int i; + out1 = in1; out2 = in2; out3 = in3; @@ -53,6 +66,11 @@ int handler(const void *ctx) bpf_syscall = CONFIG_BPF_SYSCALL; kern_ver = LINUX_KERNEL_VERSION; + for (i = 0; i < in_dynarr_sz; i++) + out_dynarr[i] = in_dynarr[i]; + + out_mostly_var = read_mostly_var; + return 0; } From 26071635ac5ecd8276bf3bdfc3ea1128c93ac722 Mon Sep 17 00:00:00 2001 From: Andrii Nakryiko Date: Wed, 20 Oct 2021 18:44:03 -0700 Subject: [PATCH 095/181] libbpf: Simplify look up by name of internal maps Map name that's assigned to internal maps (.rodata, .data, .bss, etc) consist of a small prefix of bpf_object's name and ELF section name as a suffix. This makes it hard for users to "guess" the name to use for looking up by name with bpf_object__find_map_by_name() API. One proposal was to drop object name prefix from the map name and just use ".rodata", ".data", etc, names. One downside called out was that when multiple BPF applications are active on the host, it will be hard to distinguish between multiple instances of .rodata and know which BPF object (app) they belong to. Having few first characters, while quite limiting, still can give a bit of a clue, in general. Note, though, that btf_value_type_id for such global data maps (ARRAY) points to DATASEC type, which encodes full ELF name, so tools like bpftool can take advantage of this fact to "recover" full original name of the map. This is also the reason why for custom .data.* and .rodata.* maps libbpf uses only their ELF names and doesn't prepend object name at all. Another downside of such approach is that it is not backwards compatible and, among direct use of bpf_object__find_map_by_name() API, will break any BPF skeleton generated using bpftool that was compiled with older libbpf version. Instead of causing all this pain, libbpf will still generate map name using a combination of object name and ELF section name, but it will allow looking such maps up by their natural names, which correspond to their respective ELF section names. This means non-truncated ELF section names longer than 15 characters are going to be expected and supported. With such set up, we get the best of both worlds: leave small bits of a clue about BPF application that instantiated such maps, as well as making it easy for user apps to lookup such maps at runtime. In this sense it closes corresponding libbpf 1.0 issue ([0]). BPF skeletons will continue using full names for lookups. [0] Closes: https://github.com/libbpf/libbpf/issues/275 Signed-off-by: Andrii Nakryiko Signed-off-by: Alexei Starovoitov Acked-by: Song Liu Link: https://lore.kernel.org/bpf/20211021014404.2635234-10-andrii@kernel.org --- tools/lib/bpf/libbpf.c | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/tools/lib/bpf/libbpf.c b/tools/lib/bpf/libbpf.c index 031f4611e0ffe..db6e480148394 100644 --- a/tools/lib/bpf/libbpf.c +++ b/tools/lib/bpf/libbpf.c @@ -9043,6 +9043,16 @@ bpf_object__find_map_by_name(const struct bpf_object *obj, const char *name) struct bpf_map *pos; bpf_object__for_each_map(pos, obj) { + /* if it's a special internal map name (which always starts + * with dot) then check if that special name matches the + * real map name (ELF section name) + */ + if (name[0] == '.') { + if (pos->real_name && strcmp(pos->real_name, name) == 0) + return pos; + continue; + } + /* otherwise map name has to be an exact match */ if (map_uses_real_name(pos)) { if (strcmp(pos->real_name, name) == 0) return pos; From 4f2511e1990985103929ab799fb3ebca81969b77 Mon Sep 17 00:00:00 2001 From: Andrii Nakryiko Date: Wed, 20 Oct 2021 18:44:04 -0700 Subject: [PATCH 096/181] selftests/bpf: Switch to ".bss"/".rodata"/".data" lookups for internal maps Utilize libbpf's feature of allowing to lookup internal maps by their ELF section names. No need to guess or calculate the exact truncated prefix taken from the object name. Signed-off-by: Andrii Nakryiko Signed-off-by: Alexei Starovoitov Acked-by: Song Liu Link: https://lore.kernel.org/bpf/20211021014404.2635234-11-andrii@kernel.org --- .../testing/selftests/bpf/prog_tests/core_autosize.c | 2 +- tools/testing/selftests/bpf/prog_tests/core_reloc.c | 2 +- tools/testing/selftests/bpf/prog_tests/global_data.c | 11 +++++++++-- .../selftests/bpf/prog_tests/global_data_init.c | 2 +- tools/testing/selftests/bpf/prog_tests/kfree_skb.c | 2 +- tools/testing/selftests/bpf/prog_tests/rdonly_maps.c | 2 +- 6 files changed, 14 insertions(+), 7 deletions(-) diff --git a/tools/testing/selftests/bpf/prog_tests/core_autosize.c b/tools/testing/selftests/bpf/prog_tests/core_autosize.c index 3d4b2a358d476..2a0dac6394ef4 100644 --- a/tools/testing/selftests/bpf/prog_tests/core_autosize.c +++ b/tools/testing/selftests/bpf/prog_tests/core_autosize.c @@ -163,7 +163,7 @@ void test_core_autosize(void) usleep(1); - bss_map = bpf_object__find_map_by_name(skel->obj, "test_cor.bss"); + bss_map = bpf_object__find_map_by_name(skel->obj, ".bss"); if (!ASSERT_OK_PTR(bss_map, "bss_map_find")) goto cleanup; diff --git a/tools/testing/selftests/bpf/prog_tests/core_reloc.c b/tools/testing/selftests/bpf/prog_tests/core_reloc.c index 763302e63a294..cc50f8feeca3d 100644 --- a/tools/testing/selftests/bpf/prog_tests/core_reloc.c +++ b/tools/testing/selftests/bpf/prog_tests/core_reloc.c @@ -867,7 +867,7 @@ void test_core_reloc(void) goto cleanup; } - data_map = bpf_object__find_map_by_name(obj, "test_cor.bss"); + data_map = bpf_object__find_map_by_name(obj, ".bss"); if (CHECK(!data_map, "find_data_map", "data map not found\n")) goto cleanup; diff --git a/tools/testing/selftests/bpf/prog_tests/global_data.c b/tools/testing/selftests/bpf/prog_tests/global_data.c index 9efa7e50eab27..afd8639f9a947 100644 --- a/tools/testing/selftests/bpf/prog_tests/global_data.c +++ b/tools/testing/selftests/bpf/prog_tests/global_data.c @@ -103,11 +103,18 @@ static void test_global_data_struct(struct bpf_object *obj, __u32 duration) static void test_global_data_rdonly(struct bpf_object *obj, __u32 duration) { int err = -ENOMEM, map_fd, zero = 0; - struct bpf_map *map; + struct bpf_map *map, *map2; __u8 *buff; map = bpf_object__find_map_by_name(obj, "test_glo.rodata"); - if (CHECK_FAIL(!map || !bpf_map__is_internal(map))) + if (!ASSERT_OK_PTR(map, "map")) + return; + if (!ASSERT_TRUE(bpf_map__is_internal(map), "is_internal")) + return; + + /* ensure we can lookup internal maps by their ELF names */ + map2 = bpf_object__find_map_by_name(obj, ".rodata"); + if (!ASSERT_EQ(map, map2, "same_maps")) return; map_fd = bpf_map__fd(map); diff --git a/tools/testing/selftests/bpf/prog_tests/global_data_init.c b/tools/testing/selftests/bpf/prog_tests/global_data_init.c index ee46b11f1f9a2..1db86eab101b3 100644 --- a/tools/testing/selftests/bpf/prog_tests/global_data_init.c +++ b/tools/testing/selftests/bpf/prog_tests/global_data_init.c @@ -16,7 +16,7 @@ void test_global_data_init(void) if (CHECK_FAIL(err)) return; - map = bpf_object__find_map_by_name(obj, "test_glo.rodata"); + map = bpf_object__find_map_by_name(obj, ".rodata"); if (CHECK_FAIL(!map || !bpf_map__is_internal(map))) goto out; diff --git a/tools/testing/selftests/bpf/prog_tests/kfree_skb.c b/tools/testing/selftests/bpf/prog_tests/kfree_skb.c index 032a322d51f2a..01e51d16c8b8d 100644 --- a/tools/testing/selftests/bpf/prog_tests/kfree_skb.c +++ b/tools/testing/selftests/bpf/prog_tests/kfree_skb.c @@ -93,7 +93,7 @@ void serial_test_kfree_skb(void) if (CHECK(!fexit, "find_prog", "prog eth_type_trans not found\n")) goto close_prog; - global_data = bpf_object__find_map_by_name(obj2, "kfree_sk.bss"); + global_data = bpf_object__find_map_by_name(obj2, ".bss"); if (CHECK(!global_data, "find global data", "not found\n")) goto close_prog; diff --git a/tools/testing/selftests/bpf/prog_tests/rdonly_maps.c b/tools/testing/selftests/bpf/prog_tests/rdonly_maps.c index 5f9eaa3ab5847..fd5d2ddfb0627 100644 --- a/tools/testing/selftests/bpf/prog_tests/rdonly_maps.c +++ b/tools/testing/selftests/bpf/prog_tests/rdonly_maps.c @@ -37,7 +37,7 @@ void test_rdonly_maps(void) if (CHECK(err, "obj_load", "err %d errno %d\n", err, errno)) goto cleanup; - bss_map = bpf_object__find_map_by_name(obj, "test_rdo.bss"); + bss_map = bpf_object__find_map_by_name(obj, ".bss"); if (CHECK(!bss_map, "find_bss_map", "failed\n")) goto cleanup; From fae1b05e6f0acf116f6450535b0e1c13051102d3 Mon Sep 17 00:00:00 2001 From: Andrii Nakryiko Date: Fri, 22 Oct 2021 12:25:02 -0700 Subject: [PATCH 097/181] libbpf: Fix the use of aligned attribute Building libbpf sources out of kernel tree (in Github repo) we run into compilation error due to unknown __aligned attribute. It must be coming from some kernel header, which is not available to Github sources. Use explicit __attribute__((aligned(16))) instead. Fixes: 961632d54163 ("libbpf: Fix dumping non-aligned __int128") Signed-off-by: Andrii Nakryiko Signed-off-by: Alexei Starovoitov Link: https://lore.kernel.org/bpf/20211022192502.2975553-1-andrii@kernel.org --- tools/lib/bpf/btf_dump.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tools/lib/bpf/btf_dump.c b/tools/lib/bpf/btf_dump.c index e9e5801ece4ca..3c19644b5fad2 100644 --- a/tools/lib/bpf/btf_dump.c +++ b/tools/lib/bpf/btf_dump.c @@ -1676,7 +1676,7 @@ static int btf_dump_int_data(struct btf_dump *d, { __u8 encoding = btf_int_encoding(t); bool sign = encoding & BTF_INT_SIGNED; - char buf[16] __aligned(16); + char buf[16] __attribute__((aligned(16))); int sz = t->size; if (sz == 0 || sz > sizeof(buf)) { From 57385ae31ff0ffa6e9c9ae39206740efdc7f5972 Mon Sep 17 00:00:00 2001 From: Andrii Nakryiko Date: Fri, 22 Oct 2021 13:13:42 -0700 Subject: [PATCH 098/181] selftests/bpf: Make perf_buffer selftests work on 4.9 kernel again Recent change to use tp/syscalls/sys_enter_nanosleep for perf_buffer selftests causes this selftest to fail on 4.9 kernel in libbpf CI ([0]): libbpf: prog 'handle_sys_enter': failed to attach to perf_event FD 6: Invalid argument libbpf: prog 'handle_sys_enter': failed to attach to tracepoint 'syscalls/sys_enter_nanosleep': Invalid argument It's not exactly clear why, because perf_event itself is created for this tracepoint, but I can't even compile 4.9 kernel locally, so it's hard to figure this out. If anyone has better luck and would like to help investigating this, I'd really appreciate this. For now, unblock CI by switching back to raw_syscalls/sys_enter, but reduce amount of unnecessary samples emitted by filter by process ID. Use explicit ARRAY map for that to make it work on 4.9 as well, because global data isn't yet supported there. Fixes: aa274f98b269 ("selftests/bpf: Fix possible/online index mismatch in perf_buffer test") Signed-off-by: Andrii Nakryiko Signed-off-by: Alexei Starovoitov Link: https://lore.kernel.org/bpf/20211022201342.3490692-1-andrii@kernel.org --- .../selftests/bpf/prog_tests/perf_buffer.c | 5 +++++ .../selftests/bpf/progs/test_perf_buffer.c | 20 +++++++++++++++++-- 2 files changed, 23 insertions(+), 2 deletions(-) diff --git a/tools/testing/selftests/bpf/prog_tests/perf_buffer.c b/tools/testing/selftests/bpf/prog_tests/perf_buffer.c index 0b0cd045979b5..4e32f3586a75c 100644 --- a/tools/testing/selftests/bpf/prog_tests/perf_buffer.c +++ b/tools/testing/selftests/bpf/prog_tests/perf_buffer.c @@ -46,6 +46,7 @@ int trigger_on_cpu(int cpu) void serial_test_perf_buffer(void) { int err, on_len, nr_on_cpus = 0, nr_cpus, i, j; + int zero = 0, my_pid = getpid(); struct perf_buffer_opts pb_opts = {}; struct test_perf_buffer *skel; cpu_set_t cpu_seen; @@ -71,6 +72,10 @@ void serial_test_perf_buffer(void) if (CHECK(!skel, "skel_load", "skeleton open/load failed\n")) goto out_close; + err = bpf_map_update_elem(bpf_map__fd(skel->maps.my_pid_map), &zero, &my_pid, 0); + if (!ASSERT_OK(err, "my_pid_update")) + goto out_close; + /* attach probe */ err = test_perf_buffer__attach(skel); if (CHECK(err, "attach_kprobe", "err %d\n", err)) diff --git a/tools/testing/selftests/bpf/progs/test_perf_buffer.c b/tools/testing/selftests/bpf/progs/test_perf_buffer.c index a08874c5bdf2f..17d5b67744d55 100644 --- a/tools/testing/selftests/bpf/progs/test_perf_buffer.c +++ b/tools/testing/selftests/bpf/progs/test_perf_buffer.c @@ -6,20 +6,36 @@ #include #include +struct { + __uint(type, BPF_MAP_TYPE_ARRAY); + __type(key, int); + __type(value, int); + __uint(max_entries, 1); +} my_pid_map SEC(".maps"); + struct { __uint(type, BPF_MAP_TYPE_PERF_EVENT_ARRAY); __type(key, int); __type(value, int); } perf_buf_map SEC(".maps"); -SEC("tp/syscalls/sys_enter_nanosleep") +SEC("tp/raw_syscalls/sys_enter") int handle_sys_enter(void *ctx) { + int zero = 0, *my_pid, cur_pid; int cpu = bpf_get_smp_processor_id(); + my_pid = bpf_map_lookup_elem(&my_pid_map, &zero); + if (!my_pid) + return 1; + + cur_pid = bpf_get_current_pid_tgid() >> 32; + if (cur_pid != *my_pid) + return 1; + bpf_perf_event_output(ctx, &perf_buf_map, BPF_F_CURRENT_CPU, &cpu, sizeof(cpu)); - return 0; + return 1; } char _license[] SEC("license") = "GPL"; From 1000298c76830bc291358e98e8fa5baa3baa9b3a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Mauricio=20V=C3=A1squez?= Date: Fri, 22 Oct 2021 15:20:35 -0500 Subject: [PATCH 099/181] libbpf: Fix memory leak in btf__dedup() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Free btf_dedup if btf_ensure_modifiable() returns error. Fixes: 919d2b1dbb07 ("libbpf: Allow modification of BTF and add btf__add_str API") Signed-off-by: Mauricio Vásquez Signed-off-by: Andrii Nakryiko Link: https://lore.kernel.org/bpf/20211022202035.48868-1-mauricio@kinvolk.io --- tools/lib/bpf/btf.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/tools/lib/bpf/btf.c b/tools/lib/bpf/btf.c index 3a01c4b7f36ab..85705c10f7b25 100644 --- a/tools/lib/bpf/btf.c +++ b/tools/lib/bpf/btf.c @@ -2991,8 +2991,10 @@ int btf__dedup(struct btf *btf, struct btf_ext *btf_ext, return libbpf_err(-EINVAL); } - if (btf_ensure_modifiable(btf)) - return libbpf_err(-ENOMEM); + if (btf_ensure_modifiable(btf)) { + err = -ENOMEM; + goto done; + } err = btf_dedup_prep(d); if (err) { From 6a886de070fad850d6cb74a787c9ed017303d9ac Mon Sep 17 00:00:00 2001 From: Hengqi Chen Date: Fri, 22 Oct 2021 21:06:19 +0800 Subject: [PATCH 100/181] libbpf: Add btf__type_cnt() and btf__raw_data() APIs Add btf__type_cnt() and btf__raw_data() APIs and deprecate btf__get_nr_type() and btf__get_raw_data() since the old APIs don't follow the libbpf naming convention for getters which omit 'get' in the name (see [0]). btf__raw_data() is just an alias to the existing btf__get_raw_data(). btf__type_cnt() now returns the number of all types of the BTF object including 'void'. [0] Closes: https://github.com/libbpf/libbpf/issues/279 Signed-off-by: Hengqi Chen Signed-off-by: Andrii Nakryiko Link: https://lore.kernel.org/bpf/20211022130623.1548429-2-hengqi.chen@gmail.com --- tools/lib/bpf/btf.c | 36 ++++++++++++++++++++++-------------- tools/lib/bpf/btf.h | 4 ++++ tools/lib/bpf/btf_dump.c | 8 ++++---- tools/lib/bpf/libbpf.c | 36 ++++++++++++++++++------------------ tools/lib/bpf/libbpf.map | 2 ++ tools/lib/bpf/linker.c | 28 ++++++++++++++-------------- 6 files changed, 64 insertions(+), 50 deletions(-) diff --git a/tools/lib/bpf/btf.c b/tools/lib/bpf/btf.c index 85705c10f7b25..66997d985ff8e 100644 --- a/tools/lib/bpf/btf.c +++ b/tools/lib/bpf/btf.c @@ -57,7 +57,7 @@ struct btf { * representation is broken up into three independently allocated * memory regions to be able to modify them independently. * raw_data is nulled out at that point, but can be later allocated - * and cached again if user calls btf__get_raw_data(), at which point + * and cached again if user calls btf__raw_data(), at which point * raw_data will contain a contiguous copy of header, types, and * strings: * @@ -435,6 +435,11 @@ __u32 btf__get_nr_types(const struct btf *btf) return btf->start_id + btf->nr_types - 1; } +__u32 btf__type_cnt(const struct btf *btf) +{ + return btf->start_id + btf->nr_types; +} + const struct btf *btf__base_btf(const struct btf *btf) { return btf->base_btf; @@ -466,8 +471,8 @@ static int determine_ptr_size(const struct btf *btf) if (btf->base_btf && btf->base_btf->ptr_sz > 0) return btf->base_btf->ptr_sz; - n = btf__get_nr_types(btf); - for (i = 1; i <= n; i++) { + n = btf__type_cnt(btf); + for (i = 1; i < n; i++) { t = btf__type_by_id(btf, i); if (!btf_is_int(t)) continue; @@ -684,12 +689,12 @@ int btf__resolve_type(const struct btf *btf, __u32 type_id) __s32 btf__find_by_name(const struct btf *btf, const char *type_name) { - __u32 i, nr_types = btf__get_nr_types(btf); + __u32 i, nr_types = btf__type_cnt(btf); if (!strcmp(type_name, "void")) return 0; - for (i = 1; i <= nr_types; i++) { + for (i = 1; i < nr_types; i++) { const struct btf_type *t = btf__type_by_id(btf, i); const char *name = btf__name_by_offset(btf, t->name_off); @@ -703,12 +708,12 @@ __s32 btf__find_by_name(const struct btf *btf, const char *type_name) static __s32 btf_find_by_name_kind(const struct btf *btf, int start_id, const char *type_name, __u32 kind) { - __u32 i, nr_types = btf__get_nr_types(btf); + __u32 i, nr_types = btf__type_cnt(btf); if (kind == BTF_KIND_UNKN || !strcmp(type_name, "void")) return 0; - for (i = start_id; i <= nr_types; i++) { + for (i = start_id; i < nr_types; i++) { const struct btf_type *t = btf__type_by_id(btf, i); const char *name; @@ -781,7 +786,7 @@ static struct btf *btf_new_empty(struct btf *base_btf) if (base_btf) { btf->base_btf = base_btf; - btf->start_id = btf__get_nr_types(base_btf) + 1; + btf->start_id = btf__type_cnt(base_btf); btf->start_str_off = base_btf->hdr->str_len; } @@ -831,7 +836,7 @@ static struct btf *btf_new(const void *data, __u32 size, struct btf *base_btf) if (base_btf) { btf->base_btf = base_btf; - btf->start_id = btf__get_nr_types(base_btf) + 1; + btf->start_id = btf__type_cnt(base_btf); btf->start_str_off = base_btf->hdr->str_len; } @@ -1224,7 +1229,7 @@ static void *btf_get_raw_data(const struct btf *btf, __u32 *size, bool swap_endi return NULL; } -const void *btf__get_raw_data(const struct btf *btf_ro, __u32 *size) +const void *btf__raw_data(const struct btf *btf_ro, __u32 *size) { struct btf *btf = (struct btf *)btf_ro; __u32 data_sz; @@ -1232,7 +1237,7 @@ const void *btf__get_raw_data(const struct btf *btf_ro, __u32 *size) data = btf_get_raw_data(btf, &data_sz, btf->swapped_endian); if (!data) - return errno = -ENOMEM, NULL; + return errno = ENOMEM, NULL; btf->raw_size = data_sz; if (btf->swapped_endian) @@ -1243,6 +1248,9 @@ const void *btf__get_raw_data(const struct btf *btf_ro, __u32 *size) return data; } +__attribute__((alias("btf__raw_data"))) +const void *btf__get_raw_data(const struct btf *btf, __u32 *size); + const char *btf__str_by_offset(const struct btf *btf, __u32 offset) { if (offset < btf->start_str_off) @@ -1651,7 +1659,7 @@ int btf__add_btf(struct btf *btf, const struct btf *src_btf) old_strs_len = btf->hdr->str_len; data_sz = src_btf->hdr->type_len; - cnt = btf__get_nr_types(src_btf); + cnt = btf__type_cnt(src_btf) - 1; /* pre-allocate enough memory for new types */ t = btf_add_type_mem(btf, data_sz); @@ -1968,7 +1976,7 @@ int btf__add_union(struct btf *btf, const char *name, __u32 byte_sz) static struct btf_type *btf_last_type(struct btf *btf) { - return btf_type_by_id(btf, btf__get_nr_types(btf)); + return btf_type_by_id(btf, btf__type_cnt(btf) - 1); } /* @@ -3174,7 +3182,7 @@ static struct btf_dedup *btf_dedup_new(struct btf *btf, struct btf_ext *btf_ext, goto done; } - type_cnt = btf__get_nr_types(btf) + 1; + type_cnt = btf__type_cnt(btf); d->map = malloc(sizeof(__u32) * type_cnt); if (!d->map) { err = -ENOMEM; diff --git a/tools/lib/bpf/btf.h b/tools/lib/bpf/btf.h index c9364be420352..bc005ba3ceec3 100644 --- a/tools/lib/bpf/btf.h +++ b/tools/lib/bpf/btf.h @@ -132,7 +132,9 @@ LIBBPF_API __s32 btf__find_by_name(const struct btf *btf, const char *type_name); LIBBPF_API __s32 btf__find_by_name_kind(const struct btf *btf, const char *type_name, __u32 kind); +LIBBPF_DEPRECATED_SINCE(0, 7, "use btf__type_cnt() instead; note that btf__get_nr_types() == btf__type_cnt() - 1") LIBBPF_API __u32 btf__get_nr_types(const struct btf *btf); +LIBBPF_API __u32 btf__type_cnt(const struct btf *btf); LIBBPF_API const struct btf *btf__base_btf(const struct btf *btf); LIBBPF_API const struct btf_type *btf__type_by_id(const struct btf *btf, __u32 id); @@ -145,7 +147,9 @@ LIBBPF_API int btf__resolve_type(const struct btf *btf, __u32 type_id); LIBBPF_API int btf__align_of(const struct btf *btf, __u32 id); LIBBPF_API int btf__fd(const struct btf *btf); LIBBPF_API void btf__set_fd(struct btf *btf, int fd); +LIBBPF_DEPRECATED_SINCE(0, 7, "use btf__raw_data() instead") LIBBPF_API const void *btf__get_raw_data(const struct btf *btf, __u32 *size); +LIBBPF_API const void *btf__raw_data(const struct btf *btf, __u32 *size); LIBBPF_API const char *btf__name_by_offset(const struct btf *btf, __u32 offset); LIBBPF_API const char *btf__str_by_offset(const struct btf *btf, __u32 offset); LIBBPF_API int btf__get_map_kv_tids(const struct btf *btf, const char *map_name, diff --git a/tools/lib/bpf/btf_dump.c b/tools/lib/bpf/btf_dump.c index 3c19644b5fad2..8e05ab44c22ad 100644 --- a/tools/lib/bpf/btf_dump.c +++ b/tools/lib/bpf/btf_dump.c @@ -188,7 +188,7 @@ struct btf_dump *btf_dump__new(const struct btf *btf, static int btf_dump_resize(struct btf_dump *d) { - int err, last_id = btf__get_nr_types(d->btf); + int err, last_id = btf__type_cnt(d->btf) - 1; if (last_id <= d->last_id) return 0; @@ -262,7 +262,7 @@ int btf_dump__dump_type(struct btf_dump *d, __u32 id) { int err, i; - if (id > btf__get_nr_types(d->btf)) + if (id >= btf__type_cnt(d->btf)) return libbpf_err(-EINVAL); err = btf_dump_resize(d); @@ -294,11 +294,11 @@ int btf_dump__dump_type(struct btf_dump *d, __u32 id) */ static int btf_dump_mark_referenced(struct btf_dump *d) { - int i, j, n = btf__get_nr_types(d->btf); + int i, j, n = btf__type_cnt(d->btf); const struct btf_type *t; __u16 vlen; - for (i = d->last_id + 1; i <= n; i++) { + for (i = d->last_id + 1; i < n; i++) { t = btf__type_by_id(d->btf, i); vlen = btf_vlen(t); diff --git a/tools/lib/bpf/libbpf.c b/tools/lib/bpf/libbpf.c index db6e480148394..eb102440a28ab 100644 --- a/tools/lib/bpf/libbpf.c +++ b/tools/lib/bpf/libbpf.c @@ -2497,8 +2497,8 @@ static int bpf_object__init_user_btf_maps(struct bpf_object *obj, bool strict, return -EINVAL; } - nr_types = btf__get_nr_types(obj->btf); - for (i = 1; i <= nr_types; i++) { + nr_types = btf__type_cnt(obj->btf); + for (i = 1; i < nr_types; i++) { t = btf__type_by_id(obj->btf, i); if (!btf_is_datasec(t)) continue; @@ -2579,7 +2579,7 @@ static void bpf_object__sanitize_btf(struct bpf_object *obj, struct btf *btf) struct btf_type *t; int i, j, vlen; - for (i = 1; i <= btf__get_nr_types(btf); i++) { + for (i = 1; i < btf__type_cnt(btf); i++) { t = (struct btf_type *)btf__type_by_id(btf, i); if ((!has_datasec && btf_is_var(t)) || (!has_decl_tag && btf_is_decl_tag(t))) { @@ -2763,9 +2763,9 @@ static int btf_fixup_datasec(struct bpf_object *obj, struct btf *btf, static int btf_finalize_data(struct bpf_object *obj, struct btf *btf) { int err = 0; - __u32 i, n = btf__get_nr_types(btf); + __u32 i, n = btf__type_cnt(btf); - for (i = 1; i <= n; i++) { + for (i = 1; i < n; i++) { struct btf_type *t = btf_type_by_id(btf, i); /* Loader needs to fix up some of the things compiler @@ -2905,8 +2905,8 @@ static int bpf_object__sanitize_and_load_btf(struct bpf_object *obj) if (!prog->mark_btf_static || !prog_is_subprog(obj, prog)) continue; - n = btf__get_nr_types(obj->btf); - for (j = 1; j <= n; j++) { + n = btf__type_cnt(obj->btf); + for (j = 1; j < n; j++) { t = btf_type_by_id(obj->btf, j); if (!btf_is_func(t) || btf_func_linkage(t) != BTF_FUNC_GLOBAL) continue; @@ -2926,7 +2926,7 @@ static int bpf_object__sanitize_and_load_btf(struct bpf_object *obj) __u32 sz; /* clone BTF to sanitize a copy and leave the original intact */ - raw_data = btf__get_raw_data(obj->btf, &sz); + raw_data = btf__raw_data(obj->btf, &sz); kern_btf = btf__new(raw_data, sz); err = libbpf_get_error(kern_btf); if (err) @@ -2939,7 +2939,7 @@ static int bpf_object__sanitize_and_load_btf(struct bpf_object *obj) if (obj->gen_loader) { __u32 raw_size = 0; - const void *raw_data = btf__get_raw_data(kern_btf, &raw_size); + const void *raw_data = btf__raw_data(kern_btf, &raw_size); if (!raw_data) return -ENOMEM; @@ -3350,8 +3350,8 @@ static int find_extern_btf_id(const struct btf *btf, const char *ext_name) if (!btf) return -ESRCH; - n = btf__get_nr_types(btf); - for (i = 1; i <= n; i++) { + n = btf__type_cnt(btf); + for (i = 1; i < n; i++) { t = btf__type_by_id(btf, i); if (!btf_is_var(t) && !btf_is_func(t)) @@ -3382,8 +3382,8 @@ static int find_extern_sec_btf_id(struct btf *btf, int ext_btf_id) { if (!btf) return -ESRCH; - n = btf__get_nr_types(btf); - for (i = 1; i <= n; i++) { + n = btf__type_cnt(btf); + for (i = 1; i < n; i++) { t = btf__type_by_id(btf, i); if (!btf_is_datasec(t)) @@ -3467,8 +3467,8 @@ static int find_int_btf_id(const struct btf *btf) const struct btf_type *t; int i, n; - n = btf__get_nr_types(btf); - for (i = 1; i <= n; i++) { + n = btf__type_cnt(btf); + for (i = 1; i < n; i++) { t = btf__type_by_id(btf, i); if (btf_is_int(t) && btf_int_bits(t) == 32) @@ -5076,8 +5076,8 @@ static int bpf_core_add_cands(struct bpf_core_cand *local_cand, size_t targ_essent_len; int n, i; - n = btf__get_nr_types(targ_btf); - for (i = targ_start_id; i <= n; i++) { + n = btf__type_cnt(targ_btf); + for (i = targ_start_id; i < n; i++) { t = btf__type_by_id(targ_btf, i); if (btf_kind(t) != btf_kind(local_cand->t)) continue; @@ -5252,7 +5252,7 @@ bpf_core_find_cands(struct bpf_object *obj, const struct btf *local_btf, __u32 l err = bpf_core_add_cands(&local_cand, local_essent_len, obj->btf_modules[i].btf, obj->btf_modules[i].name, - btf__get_nr_types(obj->btf_vmlinux) + 1, + btf__type_cnt(obj->btf_vmlinux), cands); if (err) goto err_out; diff --git a/tools/lib/bpf/libbpf.map b/tools/lib/bpf/libbpf.map index e6fb1ba493692..116964a29e44c 100644 --- a/tools/lib/bpf/libbpf.map +++ b/tools/lib/bpf/libbpf.map @@ -395,4 +395,6 @@ LIBBPF_0.6.0 { bpf_object__prev_program; btf__add_btf; btf__add_decl_tag; + btf__raw_data; + btf__type_cnt; } LIBBPF_0.5.0; diff --git a/tools/lib/bpf/linker.c b/tools/lib/bpf/linker.c index 13e6fdc7d8cb2..7bf658fbda800 100644 --- a/tools/lib/bpf/linker.c +++ b/tools/lib/bpf/linker.c @@ -920,7 +920,7 @@ static int check_btf_type_id(__u32 *type_id, void *ctx) { struct btf *btf = ctx; - if (*type_id > btf__get_nr_types(btf)) + if (*type_id >= btf__type_cnt(btf)) return -EINVAL; return 0; @@ -947,8 +947,8 @@ static int linker_sanity_check_btf(struct src_obj *obj) if (!obj->btf) return 0; - n = btf__get_nr_types(obj->btf); - for (i = 1; i <= n; i++) { + n = btf__type_cnt(obj->btf); + for (i = 1; i < n; i++) { t = btf_type_by_id(obj->btf, i); err = err ?: btf_type_visit_type_ids(t, check_btf_type_id, obj->btf); @@ -1658,8 +1658,8 @@ static int find_glob_sym_btf(struct src_obj *obj, Elf64_Sym *sym, const char *sy return -EINVAL; } - n = btf__get_nr_types(obj->btf); - for (i = 1; i <= n; i++) { + n = btf__type_cnt(obj->btf); + for (i = 1; i < n; i++) { t = btf__type_by_id(obj->btf, i); /* some global and extern FUNCs and VARs might not be associated with any @@ -2130,8 +2130,8 @@ static int linker_fixup_btf(struct src_obj *obj) if (!obj->btf) return 0; - n = btf__get_nr_types(obj->btf); - for (i = 1; i <= n; i++) { + n = btf__type_cnt(obj->btf); + for (i = 1; i < n; i++) { struct btf_var_secinfo *vi; struct btf_type *t; @@ -2234,14 +2234,14 @@ static int linker_append_btf(struct bpf_linker *linker, struct src_obj *obj) if (!obj->btf) return 0; - start_id = btf__get_nr_types(linker->btf) + 1; - n = btf__get_nr_types(obj->btf); + start_id = btf__type_cnt(linker->btf); + n = btf__type_cnt(obj->btf); obj->btf_type_map = calloc(n + 1, sizeof(int)); if (!obj->btf_type_map) return -ENOMEM; - for (i = 1; i <= n; i++) { + for (i = 1; i < n; i++) { struct glob_sym *glob_sym = NULL; t = btf__type_by_id(obj->btf, i); @@ -2296,8 +2296,8 @@ static int linker_append_btf(struct bpf_linker *linker, struct src_obj *obj) } /* remap all the types except DATASECs */ - n = btf__get_nr_types(linker->btf); - for (i = start_id; i <= n; i++) { + n = btf__type_cnt(linker->btf); + for (i = start_id; i < n; i++) { struct btf_type *dst_t = btf_type_by_id(linker->btf, i); if (btf_type_visit_type_ids(dst_t, remap_type_id, obj->btf_type_map)) @@ -2656,7 +2656,7 @@ static int finalize_btf(struct bpf_linker *linker) __u32 raw_sz; /* bail out if no BTF data was produced */ - if (btf__get_nr_types(linker->btf) == 0) + if (btf__type_cnt(linker->btf) == 1) return 0; for (i = 1; i < linker->sec_cnt; i++) { @@ -2693,7 +2693,7 @@ static int finalize_btf(struct bpf_linker *linker) } /* Emit .BTF section */ - raw_data = btf__get_raw_data(linker->btf, &raw_sz); + raw_data = btf__raw_data(linker->btf, &raw_sz); if (!raw_data) return -ENOMEM; From 2502e74bb5f9a49c5cd50dd365fc699a7d50daed Mon Sep 17 00:00:00 2001 From: Hengqi Chen Date: Fri, 22 Oct 2021 21:06:20 +0800 Subject: [PATCH 101/181] perf bpf: Switch to new btf__raw_data API Replace the call to btf__get_raw_data with new API btf__raw_data. The old APIs will be deprecated in libbpf v0.7+. No functionality change. Signed-off-by: Hengqi Chen Signed-off-by: Andrii Nakryiko Link: https://lore.kernel.org/bpf/20211022130623.1548429-3-hengqi.chen@gmail.com --- tools/perf/util/bpf-event.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tools/perf/util/bpf-event.c b/tools/perf/util/bpf-event.c index 1a7112a87736a..388847bab6d90 100644 --- a/tools/perf/util/bpf-event.c +++ b/tools/perf/util/bpf-event.c @@ -110,7 +110,7 @@ static int perf_env__fetch_btf(struct perf_env *env, u32 data_size; const void *data; - data = btf__get_raw_data(btf, &data_size); + data = btf__raw_data(btf, &data_size); node = malloc(data_size + sizeof(struct btf_node)); if (!node) From 2d8f09fafc6351d77f724c208c8168d2512d5478 Mon Sep 17 00:00:00 2001 From: Hengqi Chen Date: Fri, 22 Oct 2021 21:06:21 +0800 Subject: [PATCH 102/181] tools/resolve_btfids: Switch to new btf__type_cnt API Replace the call to btf__get_nr_types with new API btf__type_cnt. The old API will be deprecated in libbpf v0.7+. No functionality change. Signed-off-by: Hengqi Chen Signed-off-by: Andrii Nakryiko Link: https://lore.kernel.org/bpf/20211022130623.1548429-4-hengqi.chen@gmail.com --- tools/bpf/resolve_btfids/main.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tools/bpf/resolve_btfids/main.c b/tools/bpf/resolve_btfids/main.c index 716e6ad1864b8..a59cb0ee609cd 100644 --- a/tools/bpf/resolve_btfids/main.c +++ b/tools/bpf/resolve_btfids/main.c @@ -502,12 +502,12 @@ static int symbols_resolve(struct object *obj) } err = -1; - nr_types = btf__get_nr_types(btf); + nr_types = btf__type_cnt(btf); /* * Iterate all the BTF types and search for collected symbol IDs. */ - for (type_id = 1; type_id <= nr_types; type_id++) { + for (type_id = 1; type_id < nr_types; type_id++) { const struct btf_type *type; struct rb_root *root; struct btf_id *id; From 58fc155b0e4bbd69584b7a241ab01d55ee7cfde6 Mon Sep 17 00:00:00 2001 From: Hengqi Chen Date: Fri, 22 Oct 2021 21:06:22 +0800 Subject: [PATCH 103/181] bpftool: Switch to new btf__type_cnt API Replace the call to btf__get_nr_types with new API btf__type_cnt. The old API will be deprecated in libbpf v0.7+. No functionality change. Signed-off-by: Hengqi Chen Signed-off-by: Andrii Nakryiko Link: https://lore.kernel.org/bpf/20211022130623.1548429-5-hengqi.chen@gmail.com --- tools/bpf/bpftool/btf.c | 12 ++++++------ tools/bpf/bpftool/gen.c | 4 ++-- 2 files changed, 8 insertions(+), 8 deletions(-) diff --git a/tools/bpf/bpftool/btf.c b/tools/bpf/bpftool/btf.c index 7b68d4f65fe62..0cd769adac662 100644 --- a/tools/bpf/bpftool/btf.c +++ b/tools/bpf/bpftool/btf.c @@ -329,7 +329,7 @@ static int dump_btf_type(const struct btf *btf, __u32 id, printf("\n\ttype_id=%u offset=%u size=%u", v->type, v->offset, v->size); - if (v->type <= btf__get_nr_types(btf)) { + if (v->type < btf__type_cnt(btf)) { vt = btf__type_by_id(btf, v->type); printf(" (%s '%s')", btf_kind_str[btf_kind_safe(btf_kind(vt))], @@ -390,14 +390,14 @@ static int dump_btf_raw(const struct btf *btf, } } else { const struct btf *base; - int cnt = btf__get_nr_types(btf); + int cnt = btf__type_cnt(btf); int start_id = 1; base = btf__base_btf(btf); if (base) - start_id = btf__get_nr_types(base) + 1; + start_id = btf__type_cnt(base); - for (i = start_id; i <= cnt; i++) { + for (i = start_id; i < cnt; i++) { t = btf__type_by_id(btf, i); dump_btf_type(btf, i, t); } @@ -440,9 +440,9 @@ static int dump_btf_c(const struct btf *btf, goto done; } } else { - int cnt = btf__get_nr_types(btf); + int cnt = btf__type_cnt(btf); - for (i = 1; i <= cnt; i++) { + for (i = 1; i < cnt; i++) { err = btf_dump__dump_type(d, i); if (err) goto done; diff --git a/tools/bpf/bpftool/gen.c b/tools/bpf/bpftool/gen.c index c446405ab73fe..5c18351290f00 100644 --- a/tools/bpf/bpftool/gen.c +++ b/tools/bpf/bpftool/gen.c @@ -211,7 +211,7 @@ static int codegen_datasec_def(struct bpf_object *obj, static int codegen_datasecs(struct bpf_object *obj, const char *obj_name) { struct btf *btf = bpf_object__btf(obj); - int n = btf__get_nr_types(btf); + int n = btf__type_cnt(btf); struct btf_dump *d; struct bpf_map *map; const struct btf_type *sec; @@ -233,7 +233,7 @@ static int codegen_datasecs(struct bpf_object *obj, const char *obj_name) continue; sec = NULL; - for (i = 1; i <= n; i++) { + for (i = 1; i < n; i++) { const struct btf_type *t = btf__type_by_id(btf, i); const char *name; From 487ef148cf17730442444c07a4c56f16578ec73e Mon Sep 17 00:00:00 2001 From: Hengqi Chen Date: Fri, 22 Oct 2021 21:06:23 +0800 Subject: [PATCH 104/181] selftests/bpf: Switch to new btf__type_cnt/btf__raw_data APIs Replace the calls to btf__get_nr_types/btf__get_raw_data in selftests with new APIs btf__type_cnt/btf__raw_data. The old APIs will be deprecated in libbpf v0.7+. Signed-off-by: Hengqi Chen Signed-off-by: Andrii Nakryiko Link: https://lore.kernel.org/bpf/20211022130623.1548429-6-hengqi.chen@gmail.com --- tools/testing/selftests/bpf/btf_helpers.c | 4 ++-- tools/testing/selftests/bpf/prog_tests/btf.c | 10 +++++----- tools/testing/selftests/bpf/prog_tests/btf_dump.c | 8 ++++---- tools/testing/selftests/bpf/prog_tests/btf_endian.c | 12 ++++++------ tools/testing/selftests/bpf/prog_tests/btf_split.c | 2 +- .../testing/selftests/bpf/prog_tests/core_autosize.c | 2 +- tools/testing/selftests/bpf/prog_tests/core_reloc.c | 2 +- .../selftests/bpf/prog_tests/resolve_btfids.c | 4 ++-- 8 files changed, 22 insertions(+), 22 deletions(-) diff --git a/tools/testing/selftests/bpf/btf_helpers.c b/tools/testing/selftests/bpf/btf_helpers.c index 668cfa20bb1b9..b5b6b013a245e 100644 --- a/tools/testing/selftests/bpf/btf_helpers.c +++ b/tools/testing/selftests/bpf/btf_helpers.c @@ -215,7 +215,7 @@ int btf_validate_raw(struct btf *btf, int nr_types, const char *exp_types[]) int i; bool ok = true; - ASSERT_EQ(btf__get_nr_types(btf), nr_types, "btf_nr_types"); + ASSERT_EQ(btf__type_cnt(btf) - 1, nr_types, "btf_nr_types"); for (i = 1; i <= nr_types; i++) { if (!ASSERT_STREQ(btf_type_raw_dump(btf, i), exp_types[i - 1], "raw_dump")) @@ -254,7 +254,7 @@ const char *btf_type_c_dump(const struct btf *btf) return NULL; } - for (i = 1; i <= btf__get_nr_types(btf); i++) { + for (i = 1; i < btf__type_cnt(btf); i++) { err = btf_dump__dump_type(d, i); if (err) { fprintf(stderr, "Failed to dump type [%d]: %d\n", i, err); diff --git a/tools/testing/selftests/bpf/prog_tests/btf.c b/tools/testing/selftests/bpf/prog_tests/btf.c index fa67f25bbef52..557f948f99640 100644 --- a/tools/testing/selftests/bpf/prog_tests/btf.c +++ b/tools/testing/selftests/bpf/prog_tests/btf.c @@ -7274,8 +7274,8 @@ static void do_test_dedup(unsigned int test_num) goto done; } - test_btf_data = btf__get_raw_data(test_btf, &test_btf_size); - expect_btf_data = btf__get_raw_data(expect_btf, &expect_btf_size); + test_btf_data = btf__raw_data(test_btf, &test_btf_size); + expect_btf_data = btf__raw_data(expect_btf, &expect_btf_size); if (CHECK(test_btf_size != expect_btf_size, "test_btf_size:%u != expect_btf_size:%u", test_btf_size, expect_btf_size)) { @@ -7329,8 +7329,8 @@ static void do_test_dedup(unsigned int test_num) expect_str_cur += expect_len + 1; } - test_nr_types = btf__get_nr_types(test_btf); - expect_nr_types = btf__get_nr_types(expect_btf); + test_nr_types = btf__type_cnt(test_btf); + expect_nr_types = btf__type_cnt(expect_btf); if (CHECK(test_nr_types != expect_nr_types, "test_nr_types:%u != expect_nr_types:%u", test_nr_types, expect_nr_types)) { @@ -7338,7 +7338,7 @@ static void do_test_dedup(unsigned int test_num) goto done; } - for (i = 1; i <= test_nr_types; i++) { + for (i = 1; i < test_nr_types; i++) { const struct btf_type *test_type, *expect_type; int test_size, expect_size; diff --git a/tools/testing/selftests/bpf/prog_tests/btf_dump.c b/tools/testing/selftests/bpf/prog_tests/btf_dump.c index 12f457b6786dd..3d837a7019fd4 100644 --- a/tools/testing/selftests/bpf/prog_tests/btf_dump.c +++ b/tools/testing/selftests/bpf/prog_tests/btf_dump.c @@ -27,7 +27,7 @@ static struct btf_dump_test_case { static int btf_dump_all_types(const struct btf *btf, const struct btf_dump_opts *opts) { - size_t type_cnt = btf__get_nr_types(btf); + size_t type_cnt = btf__type_cnt(btf); struct btf_dump *d; int err = 0, id; @@ -36,7 +36,7 @@ static int btf_dump_all_types(const struct btf *btf, if (err) return err; - for (id = 1; id <= type_cnt; id++) { + for (id = 1; id < type_cnt; id++) { err = btf_dump__dump_type(d, id); if (err) goto done; @@ -171,7 +171,7 @@ void test_btf_dump_incremental(void) err = btf__add_field(btf, "x", 2, 0, 0); ASSERT_OK(err, "field_ok"); - for (i = 1; i <= btf__get_nr_types(btf); i++) { + for (i = 1; i < btf__type_cnt(btf); i++) { err = btf_dump__dump_type(d, i); ASSERT_OK(err, "dump_type_ok"); } @@ -210,7 +210,7 @@ void test_btf_dump_incremental(void) err = btf__add_field(btf, "s", 3, 32, 0); ASSERT_OK(err, "field_ok"); - for (i = 1; i <= btf__get_nr_types(btf); i++) { + for (i = 1; i < btf__type_cnt(btf); i++) { err = btf_dump__dump_type(d, i); ASSERT_OK(err, "dump_type_ok"); } diff --git a/tools/testing/selftests/bpf/prog_tests/btf_endian.c b/tools/testing/selftests/bpf/prog_tests/btf_endian.c index 8ab5d3e358dd2..2653cc482df46 100644 --- a/tools/testing/selftests/bpf/prog_tests/btf_endian.c +++ b/tools/testing/selftests/bpf/prog_tests/btf_endian.c @@ -32,7 +32,7 @@ void test_btf_endian() { ASSERT_EQ(btf__endianness(btf), swap_endian, "endian"); /* Get raw BTF data in non-native endianness... */ - raw_data = btf__get_raw_data(btf, &raw_sz); + raw_data = btf__raw_data(btf, &raw_sz); if (!ASSERT_OK_PTR(raw_data, "raw_data_inverted")) goto err_out; @@ -42,9 +42,9 @@ void test_btf_endian() { goto err_out; ASSERT_EQ(btf__endianness(swap_btf), swap_endian, "endian"); - ASSERT_EQ(btf__get_nr_types(swap_btf), btf__get_nr_types(btf), "nr_types"); + ASSERT_EQ(btf__type_cnt(swap_btf), btf__type_cnt(btf), "nr_types"); - swap_raw_data = btf__get_raw_data(swap_btf, &swap_raw_sz); + swap_raw_data = btf__raw_data(swap_btf, &swap_raw_sz); if (!ASSERT_OK_PTR(swap_raw_data, "swap_raw_data")) goto err_out; @@ -58,7 +58,7 @@ void test_btf_endian() { /* swap it back to native endianness */ btf__set_endianness(swap_btf, endian); - swap_raw_data = btf__get_raw_data(swap_btf, &swap_raw_sz); + swap_raw_data = btf__raw_data(swap_btf, &swap_raw_sz); if (!ASSERT_OK_PTR(swap_raw_data, "swap_raw_data")) goto err_out; @@ -75,7 +75,7 @@ void test_btf_endian() { swap_btf = NULL; btf__set_endianness(btf, swap_endian); - raw_data = btf__get_raw_data(btf, &raw_sz); + raw_data = btf__raw_data(btf, &raw_sz); if (!ASSERT_OK_PTR(raw_data, "raw_data_inverted")) goto err_out; @@ -85,7 +85,7 @@ void test_btf_endian() { goto err_out; ASSERT_EQ(btf__endianness(swap_btf), swap_endian, "endian"); - ASSERT_EQ(btf__get_nr_types(swap_btf), btf__get_nr_types(btf), "nr_types"); + ASSERT_EQ(btf__type_cnt(swap_btf), btf__type_cnt(btf), "nr_types"); /* the type should appear as if it was stored in native endianness */ t = btf__type_by_id(swap_btf, var_id); diff --git a/tools/testing/selftests/bpf/prog_tests/btf_split.c b/tools/testing/selftests/bpf/prog_tests/btf_split.c index ca7c2a91610aa..b1ffe61f2aa9f 100644 --- a/tools/testing/selftests/bpf/prog_tests/btf_split.c +++ b/tools/testing/selftests/bpf/prog_tests/btf_split.c @@ -72,7 +72,7 @@ void test_btf_split() { d = btf_dump__new(btf2, NULL, &opts, btf_dump_printf); if (!ASSERT_OK_PTR(d, "btf_dump__new")) goto cleanup; - for (i = 1; i <= btf__get_nr_types(btf2); i++) { + for (i = 1; i < btf__type_cnt(btf2); i++) { err = btf_dump__dump_type(d, i); ASSERT_OK(err, "dump_type_ok"); } diff --git a/tools/testing/selftests/bpf/prog_tests/core_autosize.c b/tools/testing/selftests/bpf/prog_tests/core_autosize.c index 2a0dac6394ef4..1dfe14ff6aa45 100644 --- a/tools/testing/selftests/bpf/prog_tests/core_autosize.c +++ b/tools/testing/selftests/bpf/prog_tests/core_autosize.c @@ -112,7 +112,7 @@ void test_core_autosize(void) if (!ASSERT_OK_PTR(f, "btf_fdopen")) goto cleanup; - raw_data = btf__get_raw_data(btf, &raw_sz); + raw_data = btf__raw_data(btf, &raw_sz); if (!ASSERT_OK_PTR(raw_data, "raw_data")) goto cleanup; written = fwrite(raw_data, 1, raw_sz, f); diff --git a/tools/testing/selftests/bpf/prog_tests/core_reloc.c b/tools/testing/selftests/bpf/prog_tests/core_reloc.c index cc50f8feeca3d..55ec85ba73754 100644 --- a/tools/testing/selftests/bpf/prog_tests/core_reloc.c +++ b/tools/testing/selftests/bpf/prog_tests/core_reloc.c @@ -381,7 +381,7 @@ static int setup_type_id_case_local(struct core_reloc_test_case *test) exp->local_anon_void_ptr = -1; exp->local_anon_arr = -1; - for (i = 1; i <= btf__get_nr_types(local_btf); i++) + for (i = 1; i < btf__type_cnt(local_btf); i++) { t = btf__type_by_id(local_btf, i); /* we are interested only in anonymous types */ diff --git a/tools/testing/selftests/bpf/prog_tests/resolve_btfids.c b/tools/testing/selftests/bpf/prog_tests/resolve_btfids.c index f62361306f6d7..badda6309fd90 100644 --- a/tools/testing/selftests/bpf/prog_tests/resolve_btfids.c +++ b/tools/testing/selftests/bpf/prog_tests/resolve_btfids.c @@ -106,9 +106,9 @@ static int resolve_symbols(void) "Failed to load BTF from btf_data.o\n")) return -1; - nr = btf__get_nr_types(btf); + nr = btf__type_cnt(btf); - for (type_id = 1; type_id <= nr; type_id++) { + for (type_id = 1; type_id < nr; type_id++) { if (__resolve_symbol(btf, type_id)) break; } From e89ef634f81c9d90e1824ab183721f3b361472e6 Mon Sep 17 00:00:00 2001 From: Quentin Monnet Date: Fri, 22 Oct 2021 10:47:43 +0100 Subject: [PATCH 105/181] bpftool: Avoid leaking the JSON writer prepared for program metadata Bpftool creates a new JSON object for writing program metadata in plain text mode, regardless of metadata being present or not. Then this writer is freed if any metadata has been found and printed, but it leaks otherwise. We cannot destroy the object unconditionally, because the destructor prints an undesirable line break. Instead, make sure the writer is created only after we have found program metadata to print. Found with valgrind. Fixes: aff52e685eb3 ("bpftool: Support dumping metadata") Signed-off-by: Quentin Monnet Signed-off-by: Andrii Nakryiko Link: https://lore.kernel.org/bpf/20211022094743.11052-1-quentin@isovalent.com --- tools/bpf/bpftool/prog.c | 16 +++++++++------- 1 file changed, 9 insertions(+), 7 deletions(-) diff --git a/tools/bpf/bpftool/prog.c b/tools/bpf/bpftool/prog.c index 277d51c4c5d98..f633299b12618 100644 --- a/tools/bpf/bpftool/prog.c +++ b/tools/bpf/bpftool/prog.c @@ -307,18 +307,12 @@ static void show_prog_metadata(int fd, __u32 num_maps) if (printed_header) jsonw_end_object(json_wtr); } else { - json_writer_t *btf_wtr = jsonw_new(stdout); + json_writer_t *btf_wtr; struct btf_dumper d = { .btf = btf, - .jw = btf_wtr, .is_plain_text = true, }; - if (!btf_wtr) { - p_err("jsonw alloc failed"); - goto out_free; - } - for (i = 0; i < vlen; i++, vsi++) { t_var = btf__type_by_id(btf, vsi->type); name = btf__name_by_offset(btf, t_var->name_off); @@ -328,6 +322,14 @@ static void show_prog_metadata(int fd, __u32 num_maps) if (!printed_header) { printf("\tmetadata:"); + + btf_wtr = jsonw_new(stdout); + if (!btf_wtr) { + p_err("jsonw alloc failed"); + goto out_free; + } + d.jw = btf_wtr, + printed_header = true; } From a77f879ba1178437e5df87090165e5a45a91ba7f Mon Sep 17 00:00:00 2001 From: Stanislav Fomichev Date: Thu, 21 Oct 2021 14:48:12 -0700 Subject: [PATCH 106/181] libbpf: Use func name when pinning programs with LIBBPF_STRICT_SEC_NAME We can't use section name anymore because they are not unique and pinning objects with multiple programs with the same progtype/secname will fail. [0] Closes: https://github.com/libbpf/libbpf/issues/273 Fixes: 33a2c75c55e2 ("libbpf: add internal pin_name") Signed-off-by: Stanislav Fomichev Signed-off-by: Andrii Nakryiko Reviewed-by: Quentin Monnet Link: https://lore.kernel.org/bpf/20211021214814.1236114-2-sdf@google.com --- tools/lib/bpf/libbpf.c | 13 +++++++++++-- tools/lib/bpf/libbpf_legacy.h | 3 +++ 2 files changed, 14 insertions(+), 2 deletions(-) diff --git a/tools/lib/bpf/libbpf.c b/tools/lib/bpf/libbpf.c index eb102440a28ab..604abe00785f8 100644 --- a/tools/lib/bpf/libbpf.c +++ b/tools/lib/bpf/libbpf.c @@ -285,7 +285,7 @@ struct bpf_program { size_t sub_insn_off; char *name; - /* sec_name with / replaced by _; makes recursive pinning + /* name with / replaced by _; makes recursive pinning * in bpf_object__pin_programs easier */ char *pin_name; @@ -618,7 +618,16 @@ static char *__bpf_program__pin_name(struct bpf_program *prog) { char *name, *p; - name = p = strdup(prog->sec_name); + if (libbpf_mode & LIBBPF_STRICT_SEC_NAME) + name = strdup(prog->name); + else + name = strdup(prog->sec_name); + + if (!name) + return NULL; + + p = name; + while ((p = strchr(p, '/'))) *p = '_'; diff --git a/tools/lib/bpf/libbpf_legacy.h b/tools/lib/bpf/libbpf_legacy.h index 74e6f860f703e..29ccafab11a8b 100644 --- a/tools/lib/bpf/libbpf_legacy.h +++ b/tools/lib/bpf/libbpf_legacy.h @@ -52,6 +52,9 @@ enum libbpf_strict_mode { * allowed, with LIBBPF_STRICT_SEC_PREFIX this will become * unrecognized by libbpf and would have to be just SEC("xdp") and * SEC("xdp") and SEC("perf_event"). + * + * Note, in this mode the program pin path will be based on the + * function name instead of section name. */ LIBBPF_STRICT_SEC_NAME = 0x04, From d1321207b176b0a5a8004b371ade5a0d8dec7732 Mon Sep 17 00:00:00 2001 From: Stanislav Fomichev Date: Thu, 21 Oct 2021 14:48:14 -0700 Subject: [PATCH 107/181] selftests/bpf: Fix flow dissector tests - update custom loader to search by name, not section name - update bpftool commands to use proper pin path Signed-off-by: Stanislav Fomichev Signed-off-by: Andrii Nakryiko Link: https://lore.kernel.org/bpf/20211021214814.1236114-4-sdf@google.com --- .../selftests/bpf/flow_dissector_load.c | 18 +++++++++++------- .../selftests/bpf/flow_dissector_load.h | 10 ++-------- .../selftests/bpf/test_flow_dissector.sh | 10 +++++----- 3 files changed, 18 insertions(+), 20 deletions(-) diff --git a/tools/testing/selftests/bpf/flow_dissector_load.c b/tools/testing/selftests/bpf/flow_dissector_load.c index 3fd83b9dc1bfe..87fd1aa323a92 100644 --- a/tools/testing/selftests/bpf/flow_dissector_load.c +++ b/tools/testing/selftests/bpf/flow_dissector_load.c @@ -17,7 +17,7 @@ const char *cfg_pin_path = "/sys/fs/bpf/flow_dissector"; const char *cfg_map_name = "jmp_table"; bool cfg_attach = true; -char *cfg_section_name; +char *cfg_prog_name; char *cfg_path_name; static void load_and_attach_program(void) @@ -25,7 +25,11 @@ static void load_and_attach_program(void) int prog_fd, ret; struct bpf_object *obj; - ret = bpf_flow_load(&obj, cfg_path_name, cfg_section_name, + ret = libbpf_set_strict_mode(LIBBPF_STRICT_ALL); + if (ret) + error(1, 0, "failed to enable libbpf strict mode: %d", ret); + + ret = bpf_flow_load(&obj, cfg_path_name, cfg_prog_name, cfg_map_name, NULL, &prog_fd, NULL); if (ret) error(1, 0, "bpf_flow_load %s", cfg_path_name); @@ -75,15 +79,15 @@ static void parse_opts(int argc, char **argv) break; case 'p': if (cfg_path_name) - error(1, 0, "only one prog name can be given"); + error(1, 0, "only one path can be given"); cfg_path_name = optarg; break; case 's': - if (cfg_section_name) - error(1, 0, "only one section can be given"); + if (cfg_prog_name) + error(1, 0, "only one prog can be given"); - cfg_section_name = optarg; + cfg_prog_name = optarg; break; } } @@ -94,7 +98,7 @@ static void parse_opts(int argc, char **argv) if (cfg_attach && !cfg_path_name) error(1, 0, "must provide a path to the BPF program"); - if (cfg_attach && !cfg_section_name) + if (cfg_attach && !cfg_prog_name) error(1, 0, "must provide a section name"); } diff --git a/tools/testing/selftests/bpf/flow_dissector_load.h b/tools/testing/selftests/bpf/flow_dissector_load.h index 7290401ec1727..9d0acc2fc6cc0 100644 --- a/tools/testing/selftests/bpf/flow_dissector_load.h +++ b/tools/testing/selftests/bpf/flow_dissector_load.h @@ -7,7 +7,7 @@ static inline int bpf_flow_load(struct bpf_object **obj, const char *path, - const char *section_name, + const char *prog_name, const char *map_name, const char *keys_map_name, int *prog_fd, @@ -23,13 +23,7 @@ static inline int bpf_flow_load(struct bpf_object **obj, if (ret) return ret; - main_prog = NULL; - bpf_object__for_each_program(prog, *obj) { - if (strcmp(section_name, bpf_program__section_name(prog)) == 0) { - main_prog = prog; - break; - } - } + main_prog = bpf_object__find_program_by_name(*obj, prog_name); if (!main_prog) return -1; diff --git a/tools/testing/selftests/bpf/test_flow_dissector.sh b/tools/testing/selftests/bpf/test_flow_dissector.sh index 174b72a64a4c5..dbd91221727d8 100755 --- a/tools/testing/selftests/bpf/test_flow_dissector.sh +++ b/tools/testing/selftests/bpf/test_flow_dissector.sh @@ -26,22 +26,22 @@ if [[ -z $(ip netns identify $$) ]]; then type flow_dissector if ! unshare --net $bpftool prog attach pinned \ - /sys/fs/bpf/flow/flow_dissector flow_dissector; then + /sys/fs/bpf/flow/_dissect flow_dissector; then echo "Unexpected unsuccessful attach in namespace" >&2 err=1 fi - $bpftool prog attach pinned /sys/fs/bpf/flow/flow_dissector \ + $bpftool prog attach pinned /sys/fs/bpf/flow/_dissect \ flow_dissector if unshare --net $bpftool prog attach pinned \ - /sys/fs/bpf/flow/flow_dissector flow_dissector; then + /sys/fs/bpf/flow/_dissect flow_dissector; then echo "Unexpected successful attach in namespace" >&2 err=1 fi if ! $bpftool prog detach pinned \ - /sys/fs/bpf/flow/flow_dissector flow_dissector; then + /sys/fs/bpf/flow/_dissect flow_dissector; then echo "Failed to detach flow dissector" >&2 err=1 fi @@ -95,7 +95,7 @@ else fi # Attach BPF program -./flow_dissector_load -p bpf_flow.o -s flow_dissector +./flow_dissector_load -p bpf_flow.o -s _dissect # Setup tc qdisc add dev lo ingress From bd16dee66ae4de3f1726c69ac901d2b7a53b0c86 Mon Sep 17 00:00:00 2001 From: Yonghong Song Date: Thu, 21 Oct 2021 12:56:28 -0700 Subject: [PATCH 108/181] bpf: Add BTF_KIND_DECL_TAG typedef support The llvm patches ([1], [2]) added support to attach btf_decl_tag attributes to typedef declarations. This patch added support in kernel. [1] https://reviews.llvm.org/D110127 [2] https://reviews.llvm.org/D112259 Signed-off-by: Yonghong Song Signed-off-by: Alexei Starovoitov Link: https://lore.kernel.org/bpf/20211021195628.4018847-1-yhs@fb.com --- kernel/bpf/btf.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/kernel/bpf/btf.c b/kernel/bpf/btf.c index 9059053088b90..dbc3ad07e21b6 100644 --- a/kernel/bpf/btf.c +++ b/kernel/bpf/btf.c @@ -468,7 +468,7 @@ static bool btf_type_is_decl_tag(const struct btf_type *t) static bool btf_type_is_decl_tag_target(const struct btf_type *t) { return btf_type_is_func(t) || btf_type_is_struct(t) || - btf_type_is_var(t); + btf_type_is_var(t) || btf_type_is_typedef(t); } u32 btf_nr_types(const struct btf *btf) @@ -3885,7 +3885,7 @@ static int btf_decl_tag_resolve(struct btf_verifier_env *env, component_idx = btf_type_decl_tag(t)->component_idx; if (component_idx != -1) { - if (btf_type_is_var(next_type)) { + if (btf_type_is_var(next_type) || btf_type_is_typedef(next_type)) { btf_verifier_log_type(env, v->t, "Invalid component_idx"); return -EINVAL; } From 9d19a12b02bf009fefc3620234b4297e4bd7c5d5 Mon Sep 17 00:00:00 2001 From: Yonghong Song Date: Thu, 21 Oct 2021 12:56:33 -0700 Subject: [PATCH 109/181] selftests/bpf: Add BTF_KIND_DECL_TAG typedef unit tests Test good and bad variants of typedef BTF_KIND_DECL_TAG encoding. Signed-off-by: Yonghong Song Signed-off-by: Alexei Starovoitov Link: https://lore.kernel.org/bpf/20211021195633.4019472-1-yhs@fb.com --- tools/testing/selftests/bpf/prog_tests/btf.c | 36 ++++++++++++++++++++ 1 file changed, 36 insertions(+) diff --git a/tools/testing/selftests/bpf/prog_tests/btf.c b/tools/testing/selftests/bpf/prog_tests/btf.c index 557f948f99640..8257ee0af14d0 100644 --- a/tools/testing/selftests/bpf/prog_tests/btf.c +++ b/tools/testing/selftests/bpf/prog_tests/btf.c @@ -3903,6 +3903,42 @@ static struct btf_raw_test raw_tests[] = { .btf_load_err = true, .err_str = "Invalid component_idx", }, +{ + .descr = "decl_tag test #13, typedef, well-formed", + .raw_types = { + BTF_TYPE_INT_ENC(0, BTF_INT_SIGNED, 0, 32, 4), /* [1] */ + BTF_TYPEDEF_ENC(NAME_TBD, 1), /* [2] */ + BTF_DECL_TAG_ENC(NAME_TBD, 2, -1), + BTF_END_RAW, + }, + BTF_STR_SEC("\0t\0tag"), + .map_type = BPF_MAP_TYPE_ARRAY, + .map_name = "tag_type_check_btf", + .key_size = sizeof(int), + .value_size = 4, + .key_type_id = 1, + .value_type_id = 1, + .max_entries = 1, +}, +{ + .descr = "decl_tag test #14, typedef, invalid component_idx", + .raw_types = { + BTF_TYPE_INT_ENC(0, BTF_INT_SIGNED, 0, 32, 4), /* [1] */ + BTF_TYPEDEF_ENC(NAME_TBD, 1), /* [2] */ + BTF_DECL_TAG_ENC(NAME_TBD, 2, 0), + BTF_END_RAW, + }, + BTF_STR_SEC("\0local\0tag"), + .map_type = BPF_MAP_TYPE_ARRAY, + .map_name = "tag_type_check_btf", + .key_size = sizeof(int), + .value_size = 4, + .key_type_id = 1, + .value_type_id = 1, + .max_entries = 1, + .btf_load_err = true, + .err_str = "Invalid component_idx", +}, }; /* struct btf_raw_test raw_tests[] */ From 557c8c480401457d885bf7a82221dcc877692aa7 Mon Sep 17 00:00:00 2001 From: Yonghong Song Date: Thu, 21 Oct 2021 12:56:38 -0700 Subject: [PATCH 110/181] selftests/bpf: Test deduplication for BTF_KIND_DECL_TAG typedef Add unit tests for deduplication of BTF_KIND_DECL_TAG to typedef types. Also changed a few comments from "tag" to "decl_tag" to match BTF_KIND_DECL_TAG enum value name. Signed-off-by: Yonghong Song Signed-off-by: Alexei Starovoitov Link: https://lore.kernel.org/bpf/20211021195638.4019770-1-yhs@fb.com --- tools/testing/selftests/bpf/prog_tests/btf.c | 47 +++++++++++++++++--- 1 file changed, 41 insertions(+), 6 deletions(-) diff --git a/tools/testing/selftests/bpf/prog_tests/btf.c b/tools/testing/selftests/bpf/prog_tests/btf.c index 8257ee0af14d0..ac596cb06e405 100644 --- a/tools/testing/selftests/bpf/prog_tests/btf.c +++ b/tools/testing/selftests/bpf/prog_tests/btf.c @@ -6877,11 +6877,12 @@ const struct btf_dedup_test dedup_tests[] = { BTF_FUNC_PROTO_ARG_ENC(NAME_TBD, 8), BTF_FUNC_ENC(NAME_TBD, 12), /* [13] func */ BTF_TYPE_FLOAT_ENC(NAME_TBD, 2), /* [14] float */ - BTF_DECL_TAG_ENC(NAME_TBD, 13, -1), /* [15] tag */ - BTF_DECL_TAG_ENC(NAME_TBD, 13, 1), /* [16] tag */ + BTF_DECL_TAG_ENC(NAME_TBD, 13, -1), /* [15] decl_tag */ + BTF_DECL_TAG_ENC(NAME_TBD, 13, 1), /* [16] decl_tag */ + BTF_DECL_TAG_ENC(NAME_TBD, 7, -1), /* [17] decl_tag */ BTF_END_RAW, }, - BTF_STR_SEC("\0A\0B\0C\0D\0E\0F\0G\0H\0I\0J\0K\0L\0M\0N\0O\0P"), + BTF_STR_SEC("\0A\0B\0C\0D\0E\0F\0G\0H\0I\0J\0K\0L\0M\0N\0O\0P\0Q"), }, .expect = { .raw_types = { @@ -6905,11 +6906,12 @@ const struct btf_dedup_test dedup_tests[] = { BTF_FUNC_PROTO_ARG_ENC(NAME_TBD, 8), BTF_FUNC_ENC(NAME_TBD, 12), /* [13] func */ BTF_TYPE_FLOAT_ENC(NAME_TBD, 2), /* [14] float */ - BTF_DECL_TAG_ENC(NAME_TBD, 13, -1), /* [15] tag */ - BTF_DECL_TAG_ENC(NAME_TBD, 13, 1), /* [16] tag */ + BTF_DECL_TAG_ENC(NAME_TBD, 13, -1), /* [15] decl_tag */ + BTF_DECL_TAG_ENC(NAME_TBD, 13, 1), /* [16] decl_tag */ + BTF_DECL_TAG_ENC(NAME_TBD, 7, -1), /* [17] decl_tag */ BTF_END_RAW, }, - BTF_STR_SEC("\0A\0B\0C\0D\0E\0F\0G\0H\0I\0J\0K\0L\0M\0N\0O\0P"), + BTF_STR_SEC("\0A\0B\0C\0D\0E\0F\0G\0H\0I\0J\0K\0L\0M\0N\0O\0P\0Q"), }, .opts = { .dont_resolve_fwds = false, @@ -7204,6 +7206,39 @@ const struct btf_dedup_test dedup_tests[] = { .dont_resolve_fwds = false, }, }, +{ + .descr = "dedup: typedef tags", + .input = { + .raw_types = { + /* int */ + BTF_TYPE_INT_ENC(0, BTF_INT_SIGNED, 0, 32, 4), /* [1] */ + BTF_TYPEDEF_ENC(NAME_NTH(1), 1), /* [2] */ + BTF_TYPEDEF_ENC(NAME_NTH(1), 1), /* [3] */ + /* tag -> t: tag1, tag2 */ + BTF_DECL_TAG_ENC(NAME_NTH(2), 2, -1), /* [4] */ + BTF_DECL_TAG_ENC(NAME_NTH(3), 2, -1), /* [5] */ + /* tag -> t: tag1, tag3 */ + BTF_DECL_TAG_ENC(NAME_NTH(2), 3, -1), /* [6] */ + BTF_DECL_TAG_ENC(NAME_NTH(4), 3, -1), /* [7] */ + BTF_END_RAW, + }, + BTF_STR_SEC("\0t\0tag1\0tag2\0tag3"), + }, + .expect = { + .raw_types = { + BTF_TYPE_INT_ENC(0, BTF_INT_SIGNED, 0, 32, 4), /* [1] */ + BTF_TYPEDEF_ENC(NAME_NTH(1), 1), /* [2] */ + BTF_DECL_TAG_ENC(NAME_NTH(2), 2, -1), /* [3] */ + BTF_DECL_TAG_ENC(NAME_NTH(3), 2, -1), /* [4] */ + BTF_DECL_TAG_ENC(NAME_NTH(4), 2, -1), /* [5] */ + BTF_END_RAW, + }, + BTF_STR_SEC("\0t\0tag1\0tag2\0tag3"), + }, + .opts = { + .dont_resolve_fwds = false, + }, +}, }; From 8c18ea2d2c2913c80d8700c8bc8fe8568b8650a1 Mon Sep 17 00:00:00 2001 From: Yonghong Song Date: Thu, 21 Oct 2021 12:56:43 -0700 Subject: [PATCH 111/181] selftests/bpf: Add BTF_KIND_DECL_TAG typedef example in tag.c Change value type in progs/tag.c to a typedef with a btf_decl_tag. With `bpftool btf dump file tag.o`, we have ... [14] TYPEDEF 'value_t' type_id=17 [15] DECL_TAG 'tag1' type_id=14 component_idx=-1 [16] DECL_TAG 'tag2' type_id=14 component_idx=-1 [17] STRUCT '(anon)' size=8 vlen=2 'a' type_id=2 bits_offset=0 'b' type_id=2 bits_offset=32 ... The btf_tag selftest also succeeded: $ ./test_progs -t tag #21 btf_tag:OK Summary: 1/0 PASSED, 0 SKIPPED, 0 FAILED Signed-off-by: Yonghong Song Signed-off-by: Alexei Starovoitov Link: https://lore.kernel.org/bpf/20211021195643.4020315-1-yhs@fb.com --- tools/testing/selftests/bpf/progs/tag.c | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/tools/testing/selftests/bpf/progs/tag.c b/tools/testing/selftests/bpf/progs/tag.c index 672d19e7b120b..1792f4eda0959 100644 --- a/tools/testing/selftests/bpf/progs/tag.c +++ b/tools/testing/selftests/bpf/progs/tag.c @@ -24,18 +24,23 @@ struct key_t { int c; } __tag1 __tag2; +typedef struct { + int a; + int b; +} value_t __tag1 __tag2; + struct { __uint(type, BPF_MAP_TYPE_HASH); __uint(max_entries, 3); __type(key, struct key_t); - __type(value, __u64); + __type(value, value_t); } hashmap1 SEC(".maps"); static __noinline int foo(int x __tag1 __tag2) __tag1 __tag2 { struct key_t key; - __u64 val = 1; + value_t val = {}; key.a = key.b = key.c = x; bpf_map_update_elem(&hashmap1, &key, &val, 0); From 5a8671349dd1cfe6255c524d37d6265df7483f36 Mon Sep 17 00:00:00 2001 From: Yonghong Song Date: Thu, 21 Oct 2021 12:56:49 -0700 Subject: [PATCH 112/181] docs/bpf: Update documentation for BTF_KIND_DECL_TAG typedef support Add BTF_KIND_DECL_TAG typedef support in btf.rst. Signed-off-by: Yonghong Song Signed-off-by: Alexei Starovoitov Link: https://lore.kernel.org/bpf/20211021195649.4020514-1-yhs@fb.com --- Documentation/bpf/btf.rst | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/Documentation/bpf/btf.rst b/Documentation/bpf/btf.rst index 9e5b4a98af76b..9ad4218a751f5 100644 --- a/Documentation/bpf/btf.rst +++ b/Documentation/bpf/btf.rst @@ -474,7 +474,7 @@ No additional type data follow ``btf_type``. * ``info.kind_flag``: 0 * ``info.kind``: BTF_KIND_DECL_TAG * ``info.vlen``: 0 - * ``type``: ``struct``, ``union``, ``func`` or ``var`` + * ``type``: ``struct``, ``union``, ``func``, ``var`` or ``typedef`` ``btf_type`` is followed by ``struct btf_decl_tag``.:: @@ -483,8 +483,8 @@ No additional type data follow ``btf_type``. }; The ``name_off`` encodes btf_decl_tag attribute string. -The ``type`` should be ``struct``, ``union``, ``func`` or ``var``. -For ``var`` type, ``btf_decl_tag.component_idx`` must be ``-1``. +The ``type`` should be ``struct``, ``union``, ``func``, ``var`` or ``typedef``. +For ``var`` or ``typedef`` type, ``btf_decl_tag.component_idx`` must be ``-1``. For the other three types, if the btf_decl_tag attribute is applied to the ``struct``, ``union`` or ``func`` itself, ``btf_decl_tag.component_idx`` must be ``-1``. Otherwise, From 5245dafe3d49efba4d3285cf27ee1cc1eeafafc6 Mon Sep 17 00:00:00 2001 From: Andrii Nakryiko Date: Fri, 22 Oct 2021 17:31:56 -0700 Subject: [PATCH 113/181] libbpf: Fix overflow in BTF sanity checks btf_header's str_off+str_len or type_off+type_len can overflow as they are u32s. This will lead to bypassing the sanity checks during BTF parsing, resulting in crashes afterwards. Fix by using 64-bit signed integers for comparison. Fixes: d8123624506c ("libbpf: Fix BTF data layout checks and allow empty BTF") Reported-by: Evgeny Vereshchagin Signed-off-by: Andrii Nakryiko Signed-off-by: Alexei Starovoitov Link: https://lore.kernel.org/bpf/20211023003157.726961-1-andrii@kernel.org --- tools/lib/bpf/btf.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tools/lib/bpf/btf.c b/tools/lib/bpf/btf.c index 66997d985ff8e..05b9452088154 100644 --- a/tools/lib/bpf/btf.c +++ b/tools/lib/bpf/btf.c @@ -241,12 +241,12 @@ static int btf_parse_hdr(struct btf *btf) } meta_left = btf->raw_size - sizeof(*hdr); - if (meta_left < hdr->str_off + hdr->str_len) { + if (meta_left < (long long)hdr->str_off + hdr->str_len) { pr_debug("Invalid BTF total size:%u\n", btf->raw_size); return -EINVAL; } - if (hdr->type_off + hdr->type_len > hdr->str_off) { + if ((long long)hdr->type_off + hdr->type_len > hdr->str_off) { pr_debug("Invalid BTF data sections layout: type data at %u + %u, strings data at %u + %u\n", hdr->type_off, hdr->type_len, hdr->str_off, hdr->str_len); return -EINVAL; From c825f5fee19caf301d9821cd79abaa734322de26 Mon Sep 17 00:00:00 2001 From: Andrii Nakryiko Date: Fri, 22 Oct 2021 17:31:57 -0700 Subject: [PATCH 114/181] libbpf: Fix BTF header parsing checks Original code assumed fixed and correct BTF header length. That's not always the case, though, so fix this bug with a proper additional check. And use actual header length instead of sizeof(struct btf_header) in sanity checks. Fixes: 8a138aed4a80 ("bpf: btf: Add BTF support to libbpf") Reported-by: Evgeny Vereshchagin Signed-off-by: Andrii Nakryiko Signed-off-by: Alexei Starovoitov Link: https://lore.kernel.org/bpf/20211023003157.726961-2-andrii@kernel.org --- tools/lib/bpf/btf.c | 12 +++++++++--- 1 file changed, 9 insertions(+), 3 deletions(-) diff --git a/tools/lib/bpf/btf.c b/tools/lib/bpf/btf.c index 05b9452088154..ef924fc2c9114 100644 --- a/tools/lib/bpf/btf.c +++ b/tools/lib/bpf/btf.c @@ -236,13 +236,19 @@ static int btf_parse_hdr(struct btf *btf) } btf_bswap_hdr(hdr); } else if (hdr->magic != BTF_MAGIC) { - pr_debug("Invalid BTF magic:%x\n", hdr->magic); + pr_debug("Invalid BTF magic: %x\n", hdr->magic); return -EINVAL; } - meta_left = btf->raw_size - sizeof(*hdr); + if (btf->raw_size < hdr->hdr_len) { + pr_debug("BTF header len %u larger than data size %u\n", + hdr->hdr_len, btf->raw_size); + return -EINVAL; + } + + meta_left = btf->raw_size - hdr->hdr_len; if (meta_left < (long long)hdr->str_off + hdr->str_len) { - pr_debug("Invalid BTF total size:%u\n", btf->raw_size); + pr_debug("Invalid BTF total size: %u\n", btf->raw_size); return -EINVAL; } From 6972dc3b8778ce0d9ce819c6f1e3d32ce2bc3dd9 Mon Sep 17 00:00:00 2001 From: Andrii Nakryiko Date: Fri, 22 Oct 2021 15:32:25 -0700 Subject: [PATCH 115/181] selftests/bpf: Normalize selftest entry points Ensure that all test entry points are global void functions with no input arguments. Mark few subtest entry points as static. Signed-off-by: Andrii Nakryiko Signed-off-by: Alexei Starovoitov Link: https://lore.kernel.org/bpf/20211022223228.99920-2-andrii@kernel.org --- tools/testing/selftests/bpf/prog_tests/btf_dump.c | 2 +- .../testing/selftests/bpf/prog_tests/resolve_btfids.c | 10 ++++------ .../testing/selftests/bpf/prog_tests/signal_pending.c | 2 +- tools/testing/selftests/bpf/prog_tests/snprintf.c | 4 ++-- .../testing/selftests/bpf/prog_tests/xdp_adjust_tail.c | 6 +++--- .../selftests/bpf/prog_tests/xdp_devmap_attach.c | 4 ++-- 6 files changed, 13 insertions(+), 15 deletions(-) diff --git a/tools/testing/selftests/bpf/prog_tests/btf_dump.c b/tools/testing/selftests/bpf/prog_tests/btf_dump.c index 3d837a7019fd4..aa76360d8f499 100644 --- a/tools/testing/selftests/bpf/prog_tests/btf_dump.c +++ b/tools/testing/selftests/bpf/prog_tests/btf_dump.c @@ -133,7 +133,7 @@ static char *dump_buf; static size_t dump_buf_sz; static FILE *dump_buf_file; -void test_btf_dump_incremental(void) +static void test_btf_dump_incremental(void) { struct btf *btf = NULL; struct btf_dump *d = NULL; diff --git a/tools/testing/selftests/bpf/prog_tests/resolve_btfids.c b/tools/testing/selftests/bpf/prog_tests/resolve_btfids.c index badda6309fd90..f4a13d9dd5c8b 100644 --- a/tools/testing/selftests/bpf/prog_tests/resolve_btfids.c +++ b/tools/testing/selftests/bpf/prog_tests/resolve_btfids.c @@ -117,14 +117,14 @@ static int resolve_symbols(void) return 0; } -int test_resolve_btfids(void) +void test_resolve_btfids(void) { __u32 *test_list, *test_lists[] = { test_list_local, test_list_global }; unsigned int i, j; int ret = 0; if (resolve_symbols()) - return -1; + return; /* Check BTF_ID_LIST(test_list_local) and * BTF_ID_LIST_GLOBAL(test_list_global) IDs @@ -138,7 +138,7 @@ int test_resolve_btfids(void) test_symbols[i].name, test_list[i], test_symbols[i].id); if (ret) - return ret; + return; } } @@ -161,9 +161,7 @@ int test_resolve_btfids(void) if (i > 0) { if (!ASSERT_LE(test_set.ids[i - 1], test_set.ids[i], "sort_check")) - return -1; + return; } } - - return ret; } diff --git a/tools/testing/selftests/bpf/prog_tests/signal_pending.c b/tools/testing/selftests/bpf/prog_tests/signal_pending.c index dfcbddcbe4d3f..fdfdcff6cbef0 100644 --- a/tools/testing/selftests/bpf/prog_tests/signal_pending.c +++ b/tools/testing/selftests/bpf/prog_tests/signal_pending.c @@ -42,7 +42,7 @@ static void test_signal_pending_by_type(enum bpf_prog_type prog_type) signal(SIGALRM, SIG_DFL); } -void test_signal_pending(enum bpf_prog_type prog_type) +void test_signal_pending(void) { test_signal_pending_by_type(BPF_PROG_TYPE_SOCKET_FILTER); test_signal_pending_by_type(BPF_PROG_TYPE_FLOW_DISSECTOR); diff --git a/tools/testing/selftests/bpf/prog_tests/snprintf.c b/tools/testing/selftests/bpf/prog_tests/snprintf.c index 8fd1b4b29a0e0..394ebfc3bbf31 100644 --- a/tools/testing/selftests/bpf/prog_tests/snprintf.c +++ b/tools/testing/selftests/bpf/prog_tests/snprintf.c @@ -33,7 +33,7 @@ #define EXP_NO_BUF_RET 29 -void test_snprintf_positive(void) +static void test_snprintf_positive(void) { char exp_addr_out[] = EXP_ADDR_OUT; char exp_sym_out[] = EXP_SYM_OUT; @@ -103,7 +103,7 @@ static int load_single_snprintf(char *fmt) return ret; } -void test_snprintf_negative(void) +static void test_snprintf_negative(void) { ASSERT_OK(load_single_snprintf("valid %d"), "valid usage"); diff --git a/tools/testing/selftests/bpf/prog_tests/xdp_adjust_tail.c b/tools/testing/selftests/bpf/prog_tests/xdp_adjust_tail.c index d5c98f2cb12fb..f529e3c923ae7 100644 --- a/tools/testing/selftests/bpf/prog_tests/xdp_adjust_tail.c +++ b/tools/testing/selftests/bpf/prog_tests/xdp_adjust_tail.c @@ -2,7 +2,7 @@ #include #include -void test_xdp_adjust_tail_shrink(void) +static void test_xdp_adjust_tail_shrink(void) { const char *file = "./test_xdp_adjust_tail_shrink.o"; __u32 duration, retval, size, expect_sz; @@ -30,7 +30,7 @@ void test_xdp_adjust_tail_shrink(void) bpf_object__close(obj); } -void test_xdp_adjust_tail_grow(void) +static void test_xdp_adjust_tail_grow(void) { const char *file = "./test_xdp_adjust_tail_grow.o"; struct bpf_object *obj; @@ -58,7 +58,7 @@ void test_xdp_adjust_tail_grow(void) bpf_object__close(obj); } -void test_xdp_adjust_tail_grow2(void) +static void test_xdp_adjust_tail_grow2(void) { const char *file = "./test_xdp_adjust_tail_grow.o"; char buf[4096]; /* avoid segfault: large buf to hold grow results */ diff --git a/tools/testing/selftests/bpf/prog_tests/xdp_devmap_attach.c b/tools/testing/selftests/bpf/prog_tests/xdp_devmap_attach.c index d4e9a9972a67e..3079d5568f8fa 100644 --- a/tools/testing/selftests/bpf/prog_tests/xdp_devmap_attach.c +++ b/tools/testing/selftests/bpf/prog_tests/xdp_devmap_attach.c @@ -8,7 +8,7 @@ #define IFINDEX_LO 1 -void test_xdp_with_devmap_helpers(void) +static void test_xdp_with_devmap_helpers(void) { struct test_xdp_with_devmap_helpers *skel; struct bpf_prog_info info = {}; @@ -60,7 +60,7 @@ void test_xdp_with_devmap_helpers(void) test_xdp_with_devmap_helpers__destroy(skel); } -void test_neg_xdp_devmap_helpers(void) +static void test_neg_xdp_devmap_helpers(void) { struct test_xdp_devmap_helpers *skel; From 8ea688e7f444f1830aa4b283e1364ab9d9bf42b5 Mon Sep 17 00:00:00 2001 From: Andrii Nakryiko Date: Fri, 22 Oct 2021 15:32:26 -0700 Subject: [PATCH 116/181] selftests/bpf: Support multiple tests per file Revamp how test discovery works for test_progs and allow multiple test entries per file. Any global void function with no arguments and serial_test_ or test_ prefix is considered a test. Signed-off-by: Andrii Nakryiko Signed-off-by: Alexei Starovoitov Link: https://lore.kernel.org/bpf/20211022223228.99920-3-andrii@kernel.org --- tools/testing/selftests/bpf/Makefile | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/tools/testing/selftests/bpf/Makefile b/tools/testing/selftests/bpf/Makefile index 498222543c374..ac47cf9760fc5 100644 --- a/tools/testing/selftests/bpf/Makefile +++ b/tools/testing/selftests/bpf/Makefile @@ -421,10 +421,9 @@ ifeq ($($(TRUNNER_TESTS_DIR)-tests-hdr),) $(TRUNNER_TESTS_DIR)-tests-hdr := y $(TRUNNER_TESTS_HDR): $(TRUNNER_TESTS_DIR)/*.c $$(call msg,TEST-HDR,$(TRUNNER_BINARY),$$@) - $$(shell ( cd $(TRUNNER_TESTS_DIR); \ - echo '/* Generated header, do not edit */'; \ - ls *.c 2> /dev/null | \ - sed -e 's@\([^\.]*\)\.c@DEFINE_TEST(\1)@'; \ + $$(shell (echo '/* Generated header, do not edit */'; \ + sed -n -E 's/^void (serial_)?test_([a-zA-Z0-9_]+)\((void)?\).*/DEFINE_TEST(\2)/p' \ + $(TRUNNER_TESTS_DIR)/*.c | sort ; \ ) > $$@) endif From 2c0f51ac320649402a80a6228744eb54f42a5c21 Mon Sep 17 00:00:00 2001 From: Andrii Nakryiko Date: Fri, 22 Oct 2021 15:32:27 -0700 Subject: [PATCH 117/181] selftests/bpf: Mark tc_redirect selftest as serial It seems to cause a lot of harm to kprobe/tracepoint selftests. Yucong mentioned before that it does manipulate sysfs, which might be the reason. So let's mark it as serial, though ideally it would be less intrusive on the system at test. Signed-off-by: Andrii Nakryiko Signed-off-by: Alexei Starovoitov Link: https://lore.kernel.org/bpf/20211022223228.99920-4-andrii@kernel.org --- tools/testing/selftests/bpf/prog_tests/tc_redirect.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tools/testing/selftests/bpf/prog_tests/tc_redirect.c b/tools/testing/selftests/bpf/prog_tests/tc_redirect.c index e87bc4466d9ac..53672634bc524 100644 --- a/tools/testing/selftests/bpf/prog_tests/tc_redirect.c +++ b/tools/testing/selftests/bpf/prog_tests/tc_redirect.c @@ -769,7 +769,7 @@ static void *test_tc_redirect_run_tests(void *arg) return NULL; } -void test_tc_redirect(void) +void serial_test_tc_redirect(void) { pthread_t test_thread; int err; From 3762a39ce85feb07996f3f0390963da71874c651 Mon Sep 17 00:00:00 2001 From: Andrii Nakryiko Date: Fri, 22 Oct 2021 15:32:28 -0700 Subject: [PATCH 118/181] selftests/bpf: Split out bpf_verif_scale selftests into multiple tests Instead of using subtests in bpf_verif_scale selftest, turn each scale sub-test into its own test. Each subtest is compltely independent and just reuses a bit of common test running logic, so the conversion is trivial. For convenience, keep all of BPF verifier scale tests in one file. This conversion shaves off a significant amount of time when running test_progs in parallel mode. E.g., just running scale tests (-t verif_scale): BEFORE ====== Summary: 24/0 PASSED, 0 SKIPPED, 0 FAILED real 0m22.894s user 0m0.012s sys 0m22.797s AFTER ===== Summary: 24/0 PASSED, 0 SKIPPED, 0 FAILED real 0m12.044s user 0m0.024s sys 0m27.869s Ten second saving right there. test_progs -j is not yet ready to be turned on by default, unfortunately, and some tests fail almost every time, but this is a good improvement nevertheless. Ignoring few failures, here is sequential vs parallel run times when running all tests now: SEQUENTIAL ========== Summary: 206/953 PASSED, 4 SKIPPED, 0 FAILED real 1m5.625s user 0m4.211s sys 0m31.650s PARALLEL ======== Summary: 204/952 PASSED, 4 SKIPPED, 2 FAILED real 0m35.550s user 0m4.998s sys 0m39.890s Signed-off-by: Andrii Nakryiko Signed-off-by: Alexei Starovoitov Link: https://lore.kernel.org/bpf/20211022223228.99920-5-andrii@kernel.org --- .../bpf/prog_tests/bpf_verif_scale.c | 220 ++++++++++++------ 1 file changed, 152 insertions(+), 68 deletions(-) diff --git a/tools/testing/selftests/bpf/prog_tests/bpf_verif_scale.c b/tools/testing/selftests/bpf/prog_tests/bpf_verif_scale.c index 3d002c245d2b7..867349e4ed9e3 100644 --- a/tools/testing/selftests/bpf/prog_tests/bpf_verif_scale.c +++ b/tools/testing/selftests/bpf/prog_tests/bpf_verif_scale.c @@ -39,82 +39,166 @@ struct scale_test_def { bool fails; }; -void test_bpf_verif_scale(void) -{ - struct scale_test_def tests[] = { - { "loop3.o", BPF_PROG_TYPE_RAW_TRACEPOINT, true /* fails */ }, - - { "test_verif_scale1.o", BPF_PROG_TYPE_SCHED_CLS }, - { "test_verif_scale2.o", BPF_PROG_TYPE_SCHED_CLS }, - { "test_verif_scale3.o", BPF_PROG_TYPE_SCHED_CLS }, - - { "pyperf_global.o", BPF_PROG_TYPE_RAW_TRACEPOINT }, - { "pyperf_subprogs.o", BPF_PROG_TYPE_RAW_TRACEPOINT }, - - /* full unroll by llvm */ - { "pyperf50.o", BPF_PROG_TYPE_RAW_TRACEPOINT }, - { "pyperf100.o", BPF_PROG_TYPE_RAW_TRACEPOINT }, - { "pyperf180.o", BPF_PROG_TYPE_RAW_TRACEPOINT }, - - /* partial unroll. llvm will unroll loop ~150 times. - * C loop count -> 600. - * Asm loop count -> 4. - * 16k insns in loop body. - * Total of 5 such loops. Total program size ~82k insns. - */ - { "pyperf600.o", BPF_PROG_TYPE_RAW_TRACEPOINT }, - - /* no unroll at all. - * C loop count -> 600. - * ASM loop count -> 600. - * ~110 insns in loop body. - * Total of 5 such loops. Total program size ~1500 insns. - */ - { "pyperf600_nounroll.o", BPF_PROG_TYPE_RAW_TRACEPOINT }, - - { "loop1.o", BPF_PROG_TYPE_RAW_TRACEPOINT }, - { "loop2.o", BPF_PROG_TYPE_RAW_TRACEPOINT }, - { "loop4.o", BPF_PROG_TYPE_SCHED_CLS }, - { "loop5.o", BPF_PROG_TYPE_SCHED_CLS }, - { "loop6.o", BPF_PROG_TYPE_KPROBE }, - - /* partial unroll. 19k insn in a loop. - * Total program size 20.8k insn. - * ~350k processed_insns - */ - { "strobemeta.o", BPF_PROG_TYPE_RAW_TRACEPOINT }, - - /* no unroll, tiny loops */ - { "strobemeta_nounroll1.o", BPF_PROG_TYPE_RAW_TRACEPOINT }, - { "strobemeta_nounroll2.o", BPF_PROG_TYPE_RAW_TRACEPOINT }, - - /* non-inlined subprogs */ - { "strobemeta_subprogs.o", BPF_PROG_TYPE_RAW_TRACEPOINT }, - - { "test_sysctl_loop1.o", BPF_PROG_TYPE_CGROUP_SYSCTL }, - { "test_sysctl_loop2.o", BPF_PROG_TYPE_CGROUP_SYSCTL }, - - { "test_xdp_loop.o", BPF_PROG_TYPE_XDP }, - { "test_seg6_loop.o", BPF_PROG_TYPE_LWT_SEG6LOCAL }, - }; +static void scale_test(const char *file, + enum bpf_prog_type attach_type, + bool should_fail) +{ libbpf_print_fn_t old_print_fn = NULL; - int err, i; + int err; if (env.verifier_stats) { test__force_log(); old_print_fn = libbpf_set_print(libbpf_debug_print); } - for (i = 0; i < ARRAY_SIZE(tests); i++) { - const struct scale_test_def *test = &tests[i]; - - if (!test__start_subtest(test->file)) - continue; - - err = check_load(test->file, test->attach_type); - CHECK_FAIL(err && !test->fails); - } + err = check_load(file, attach_type); + if (should_fail) + ASSERT_ERR(err, "expect_error"); + else + ASSERT_OK(err, "expect_success"); if (env.verifier_stats) libbpf_set_print(old_print_fn); } + +void test_verif_scale1() +{ + scale_test("test_verif_scale1.o", BPF_PROG_TYPE_SCHED_CLS, false); +} + +void test_verif_scale2() +{ + scale_test("test_verif_scale2.o", BPF_PROG_TYPE_SCHED_CLS, false); +} + +void test_verif_scale3() +{ + scale_test("test_verif_scale3.o", BPF_PROG_TYPE_SCHED_CLS, false); +} + +void test_verif_scale_pyperf_global() +{ + scale_test("pyperf_global.o", BPF_PROG_TYPE_RAW_TRACEPOINT, false); +} + +void test_verif_scale_pyperf_subprogs() +{ + scale_test("pyperf_subprogs.o", BPF_PROG_TYPE_RAW_TRACEPOINT, false); +} + +void test_verif_scale_pyperf50() +{ + /* full unroll by llvm */ + scale_test("pyperf50.o", BPF_PROG_TYPE_RAW_TRACEPOINT, false); +} + +void test_verif_scale_pyperf100() +{ + /* full unroll by llvm */ + scale_test("pyperf100.o", BPF_PROG_TYPE_RAW_TRACEPOINT, false); +} + +void test_verif_scale_pyperf180() +{ + /* full unroll by llvm */ + scale_test("pyperf180.o", BPF_PROG_TYPE_RAW_TRACEPOINT, false); +} + +void test_verif_scale_pyperf600() +{ + /* partial unroll. llvm will unroll loop ~150 times. + * C loop count -> 600. + * Asm loop count -> 4. + * 16k insns in loop body. + * Total of 5 such loops. Total program size ~82k insns. + */ + scale_test("pyperf600.o", BPF_PROG_TYPE_RAW_TRACEPOINT, false); +} + +void test_verif_scale_pyperf600_nounroll() +{ + /* no unroll at all. + * C loop count -> 600. + * ASM loop count -> 600. + * ~110 insns in loop body. + * Total of 5 such loops. Total program size ~1500 insns. + */ + scale_test("pyperf600_nounroll.o", BPF_PROG_TYPE_RAW_TRACEPOINT, false); +} + +void test_verif_scale_loop1() +{ + scale_test("loop1.o", BPF_PROG_TYPE_RAW_TRACEPOINT, false); +} + +void test_verif_scale_loop2() +{ + scale_test("loop2.o", BPF_PROG_TYPE_RAW_TRACEPOINT, false); +} + +void test_verif_scale_loop3_fail() +{ + scale_test("loop3.o", BPF_PROG_TYPE_RAW_TRACEPOINT, true /* fails */); +} + +void test_verif_scale_loop4() +{ + scale_test("loop4.o", BPF_PROG_TYPE_SCHED_CLS, false); +} + +void test_verif_scale_loop5() +{ + scale_test("loop5.o", BPF_PROG_TYPE_SCHED_CLS, false); +} + +void test_verif_scale_loop6() +{ + scale_test("loop6.o", BPF_PROG_TYPE_KPROBE, false); +} + +void test_verif_scale_strobemeta() +{ + /* partial unroll. 19k insn in a loop. + * Total program size 20.8k insn. + * ~350k processed_insns + */ + scale_test("strobemeta.o", BPF_PROG_TYPE_RAW_TRACEPOINT, false); +} + +void test_verif_scale_strobemeta_nounroll1() +{ + /* no unroll, tiny loops */ + scale_test("strobemeta_nounroll1.o", BPF_PROG_TYPE_RAW_TRACEPOINT, false); +} + +void test_verif_scale_strobemeta_nounroll2() +{ + /* no unroll, tiny loops */ + scale_test("strobemeta_nounroll2.o", BPF_PROG_TYPE_RAW_TRACEPOINT, false); +} + +void test_verif_scale_strobemeta_subprogs() +{ + /* non-inlined subprogs */ + scale_test("strobemeta_subprogs.o", BPF_PROG_TYPE_RAW_TRACEPOINT, false); +} + +void test_verif_scale_sysctl_loop1() +{ + scale_test("test_sysctl_loop1.o", BPF_PROG_TYPE_CGROUP_SYSCTL, false); +} + +void test_verif_scale_sysctl_loop2() +{ + scale_test("test_sysctl_loop2.o", BPF_PROG_TYPE_CGROUP_SYSCTL, false); +} + +void test_verif_scale_xdp_loop() +{ + scale_test("test_xdp_loop.o", BPF_PROG_TYPE_XDP, false); +} + +void test_verif_scale_seg6_loop() +{ + scale_test("test_seg6_loop.o", BPF_PROG_TYPE_LWT_SEG6LOCAL, false); +} From 8b6c46241c774c83998092a4eafe40f054568881 Mon Sep 17 00:00:00 2001 From: Quentin Monnet Date: Sat, 23 Oct 2021 21:51:50 +0100 Subject: [PATCH 119/181] bpftool: Remove Makefile dep. on $(LIBBPF) for $(LIBBPF_INTERNAL_HDRS) The dependency is only useful to make sure that the $(LIBBPF_HDRS_DIR) directory is created before we try to install locally the required libbpf internal header. Let's create this directory properly instead. This is in preparation of making $(LIBBPF_INTERNAL_HDRS) a dependency to the bootstrap bpftool version, in which case we want no dependency on $(LIBBPF). Signed-off-by: Quentin Monnet Signed-off-by: Andrii Nakryiko Link: https://lore.kernel.org/bpf/20211023205154.6710-2-quentin@isovalent.com --- tools/bpf/bpftool/Makefile | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tools/bpf/bpftool/Makefile b/tools/bpf/bpftool/Makefile index 098d762e111a7..939b0fca5fb96 100644 --- a/tools/bpf/bpftool/Makefile +++ b/tools/bpf/bpftool/Makefile @@ -39,14 +39,14 @@ ifeq ($(BPFTOOL_VERSION),) BPFTOOL_VERSION := $(shell make -rR --no-print-directory -sC ../../.. kernelversion) endif -$(LIBBPF_OUTPUT) $(BOOTSTRAP_OUTPUT) $(LIBBPF_BOOTSTRAP_OUTPUT): +$(LIBBPF_OUTPUT) $(BOOTSTRAP_OUTPUT) $(LIBBPF_BOOTSTRAP_OUTPUT) $(LIBBPF_HDRS_DIR): $(QUIET_MKDIR)mkdir -p $@ $(LIBBPF): $(wildcard $(BPF_DIR)/*.[ch] $(BPF_DIR)/Makefile) | $(LIBBPF_OUTPUT) $(Q)$(MAKE) -C $(BPF_DIR) OUTPUT=$(LIBBPF_OUTPUT) \ DESTDIR=$(LIBBPF_DESTDIR) prefix= $(LIBBPF) install_headers -$(LIBBPF_INTERNAL_HDRS): $(LIBBPF_HDRS_DIR)/%.h: $(BPF_DIR)/%.h $(LIBBPF) +$(LIBBPF_INTERNAL_HDRS): $(LIBBPF_HDRS_DIR)/%.h: $(BPF_DIR)/%.h | $(LIBBPF_HDRS_DIR) $(call QUIET_INSTALL, $@) $(Q)install -m 644 -t $(LIBBPF_HDRS_DIR) $< From 46241271d18f3ae095b7ec3d9d136d8f4e28e025 Mon Sep 17 00:00:00 2001 From: Quentin Monnet Date: Sat, 23 Oct 2021 21:51:51 +0100 Subject: [PATCH 120/181] bpftool: Do not expose and init hash maps for pinned path in main.c BPF programs, maps, and links, can all be listed with their pinned paths by bpftool, when the "-f" option is provided. To do so, bpftool builds hash maps containing all pinned paths for each kind of objects. These three hash maps are always initialised in main.c, and exposed through main.h. There appear to be no particular reason to do so: we can just as well make them static to the files that need them (prog.c, map.c, and link.c respectively), and initialise them only when we want to show objects and the "-f" switch is provided. This may prevent unnecessary memory allocations if the implementation of the hash maps was to change in the future. Signed-off-by: Quentin Monnet Signed-off-by: Andrii Nakryiko Link: https://lore.kernel.org/bpf/20211023205154.6710-3-quentin@isovalent.com --- tools/bpf/bpftool/link.c | 9 ++++++++- tools/bpf/bpftool/main.c | 12 ------------ tools/bpf/bpftool/main.h | 3 --- tools/bpf/bpftool/map.c | 9 ++++++++- tools/bpf/bpftool/prog.c | 9 ++++++++- 5 files changed, 24 insertions(+), 18 deletions(-) diff --git a/tools/bpf/bpftool/link.c b/tools/bpf/bpftool/link.c index 8cc3e36f8cc69..a5effb1816b78 100644 --- a/tools/bpf/bpftool/link.c +++ b/tools/bpf/bpftool/link.c @@ -20,6 +20,8 @@ static const char * const link_type_name[] = { [BPF_LINK_TYPE_NETNS] = "netns", }; +static struct pinned_obj_table link_table; + static int link_parse_fd(int *argc, char ***argv) { int fd; @@ -302,8 +304,10 @@ static int do_show(int argc, char **argv) __u32 id = 0; int err, fd; - if (show_pinned) + if (show_pinned) { + hash_init(link_table.table); build_pinned_obj_table(&link_table, BPF_OBJ_LINK); + } build_obj_refs_table(&refs_table, BPF_OBJ_LINK); if (argc == 2) { @@ -346,6 +350,9 @@ static int do_show(int argc, char **argv) delete_obj_refs_table(&refs_table); + if (show_pinned) + delete_pinned_obj_table(&link_table); + return errno == ENOENT ? 0 : -1; } diff --git a/tools/bpf/bpftool/main.c b/tools/bpf/bpftool/main.c index 02eaaf065f651..7a33f0e6da281 100644 --- a/tools/bpf/bpftool/main.c +++ b/tools/bpf/bpftool/main.c @@ -31,9 +31,6 @@ bool verifier_logs; bool relaxed_maps; bool use_loader; struct btf *base_btf; -struct pinned_obj_table prog_table; -struct pinned_obj_table map_table; -struct pinned_obj_table link_table; struct obj_refs_table refs_table; static void __noreturn clean_and_exit(int i) @@ -409,10 +406,6 @@ int main(int argc, char **argv) block_mount = false; bin_name = argv[0]; - hash_init(prog_table.table); - hash_init(map_table.table); - hash_init(link_table.table); - opterr = 0; while ((opt = getopt_long(argc, argv, "VhpjfLmndB:", options, NULL)) >= 0) { @@ -479,11 +472,6 @@ int main(int argc, char **argv) if (json_output) jsonw_destroy(&json_wtr); - if (show_pinned) { - delete_pinned_obj_table(&prog_table); - delete_pinned_obj_table(&map_table); - delete_pinned_obj_table(&link_table); - } btf__free(base_btf); return ret; diff --git a/tools/bpf/bpftool/main.h b/tools/bpf/bpftool/main.h index 90caa42aac4cf..baf607cd59241 100644 --- a/tools/bpf/bpftool/main.h +++ b/tools/bpf/bpftool/main.h @@ -91,9 +91,6 @@ extern bool verifier_logs; extern bool relaxed_maps; extern bool use_loader; extern struct btf *base_btf; -extern struct pinned_obj_table prog_table; -extern struct pinned_obj_table map_table; -extern struct pinned_obj_table link_table; extern struct obj_refs_table refs_table; void __printf(1, 2) p_err(const char *fmt, ...); diff --git a/tools/bpf/bpftool/map.c b/tools/bpf/bpftool/map.c index 407071d54ab1c..0085039d96103 100644 --- a/tools/bpf/bpftool/map.c +++ b/tools/bpf/bpftool/map.c @@ -56,6 +56,8 @@ const char * const map_type_name[] = { const size_t map_type_name_size = ARRAY_SIZE(map_type_name); +static struct pinned_obj_table map_table; + static bool map_is_per_cpu(__u32 type) { return type == BPF_MAP_TYPE_PERCPU_HASH || @@ -694,8 +696,10 @@ static int do_show(int argc, char **argv) int err; int fd; - if (show_pinned) + if (show_pinned) { + hash_init(map_table.table); build_pinned_obj_table(&map_table, BPF_OBJ_MAP); + } build_obj_refs_table(&refs_table, BPF_OBJ_MAP); if (argc == 2) @@ -742,6 +746,9 @@ static int do_show(int argc, char **argv) delete_obj_refs_table(&refs_table); + if (show_pinned) + delete_pinned_obj_table(&map_table); + return errno == ENOENT ? 0 : -1; } diff --git a/tools/bpf/bpftool/prog.c b/tools/bpf/bpftool/prog.c index f633299b12618..48c2fa4d068e5 100644 --- a/tools/bpf/bpftool/prog.c +++ b/tools/bpf/bpftool/prog.c @@ -84,6 +84,8 @@ static const char * const attach_type_strings[] = { [__MAX_BPF_ATTACH_TYPE] = NULL, }; +static struct pinned_obj_table prog_table; + static enum bpf_attach_type parse_attach_type(const char *str) { enum bpf_attach_type type; @@ -567,8 +569,10 @@ static int do_show(int argc, char **argv) int err; int fd; - if (show_pinned) + if (show_pinned) { + hash_init(prog_table.table); build_pinned_obj_table(&prog_table, BPF_OBJ_PROG); + } build_obj_refs_table(&refs_table, BPF_OBJ_PROG); if (argc == 2) @@ -613,6 +617,9 @@ static int do_show(int argc, char **argv) delete_obj_refs_table(&refs_table); + if (show_pinned) + delete_pinned_obj_table(&prog_table); + return err; } From 8f184732b60b74a8f8ba0d9a5c248bf611b1ebba Mon Sep 17 00:00:00 2001 From: Quentin Monnet Date: Sat, 23 Oct 2021 21:51:52 +0100 Subject: [PATCH 121/181] bpftool: Switch to libbpf's hashmap for pinned paths of BPF objects In order to show pinned paths for BPF programs, maps, or links when listing them with the "-f" option, bpftool creates hash maps to store all relevant paths under the bpffs. So far, it would rely on the kernel implementation (from tools/include/linux/hashtable.h). We can make bpftool rely on libbpf's implementation instead. The motivation is to make bpftool less dependent of kernel headers, to ease the path to a potential out-of-tree mirror, like libbpf has. This commit is the first step of the conversion: the hash maps for pinned paths for programs, maps, and links are converted to libbpf's hashmap.{c,h}. Other hash maps used for the PIDs of process holding references to BPF objects are left unchanged for now. On the build side, this requires adding a dependency to a second header internal to libbpf, and making it a dependency for the bootstrap bpftool version as well. The rest of the changes are a rather straightforward conversion. Signed-off-by: Quentin Monnet Signed-off-by: Andrii Nakryiko Link: https://lore.kernel.org/bpf/20211023205154.6710-4-quentin@isovalent.com --- tools/bpf/bpftool/Makefile | 8 +++--- tools/bpf/bpftool/common.c | 50 +++++++++++++++++++++++--------------- tools/bpf/bpftool/link.c | 36 +++++++++++++++------------ tools/bpf/bpftool/main.h | 29 +++++++++++++--------- tools/bpf/bpftool/map.c | 36 +++++++++++++++------------ tools/bpf/bpftool/prog.c | 36 +++++++++++++++------------ 6 files changed, 111 insertions(+), 84 deletions(-) diff --git a/tools/bpf/bpftool/Makefile b/tools/bpf/bpftool/Makefile index 939b0fca5fb96..c0c30e56988f2 100644 --- a/tools/bpf/bpftool/Makefile +++ b/tools/bpf/bpftool/Makefile @@ -31,9 +31,9 @@ LIBBPF = $(LIBBPF_OUTPUT)libbpf.a LIBBPF_BOOTSTRAP_OUTPUT = $(BOOTSTRAP_OUTPUT)libbpf/ LIBBPF_BOOTSTRAP = $(LIBBPF_BOOTSTRAP_OUTPUT)libbpf.a -# We need to copy nlattr.h which is not otherwise exported by libbpf, but still -# required by bpftool. -LIBBPF_INTERNAL_HDRS := $(addprefix $(LIBBPF_HDRS_DIR)/,nlattr.h) +# We need to copy hashmap.h and nlattr.h which is not otherwise exported by +# libbpf, but still required by bpftool. +LIBBPF_INTERNAL_HDRS := $(addprefix $(LIBBPF_HDRS_DIR)/,hashmap.h nlattr.h) ifeq ($(BPFTOOL_VERSION),) BPFTOOL_VERSION := $(shell make -rR --no-print-directory -sC ../../.. kernelversion) @@ -209,7 +209,7 @@ $(BPFTOOL_BOOTSTRAP): $(BOOTSTRAP_OBJS) $(LIBBPF_BOOTSTRAP) $(OUTPUT)bpftool: $(OBJS) $(LIBBPF) $(QUIET_LINK)$(CC) $(CFLAGS) $(LDFLAGS) -o $@ $(OBJS) $(LIBS) -$(BOOTSTRAP_OUTPUT)%.o: %.c | $(BOOTSTRAP_OUTPUT) +$(BOOTSTRAP_OUTPUT)%.o: %.c $(LIBBPF_INTERNAL_HDRS) | $(BOOTSTRAP_OUTPUT) $(QUIET_CC)$(HOSTCC) $(CFLAGS) -c -MMD -o $@ $< $(OUTPUT)%.o: %.c diff --git a/tools/bpf/bpftool/common.c b/tools/bpf/bpftool/common.c index d42d930a3ec4d..511eccdbdfe6d 100644 --- a/tools/bpf/bpftool/common.c +++ b/tools/bpf/bpftool/common.c @@ -22,6 +22,7 @@ #include #include +#include #include /* libbpf_num_possible_cpus */ #include "main.h" @@ -393,7 +394,7 @@ void print_hex_data_json(uint8_t *data, size_t len) } /* extra params for nftw cb */ -static struct pinned_obj_table *build_fn_table; +static struct hashmap *build_fn_table; static enum bpf_obj_type build_fn_type; static int do_build_table_cb(const char *fpath, const struct stat *sb, @@ -401,9 +402,9 @@ static int do_build_table_cb(const char *fpath, const struct stat *sb, { struct bpf_prog_info pinned_info; __u32 len = sizeof(pinned_info); - struct pinned_obj *obj_node; enum bpf_obj_type objtype; int fd, err = 0; + char *path; if (typeflag != FTW_F) goto out_ret; @@ -420,28 +421,26 @@ static int do_build_table_cb(const char *fpath, const struct stat *sb, if (bpf_obj_get_info_by_fd(fd, &pinned_info, &len)) goto out_close; - obj_node = calloc(1, sizeof(*obj_node)); - if (!obj_node) { + path = strdup(fpath); + if (!path) { err = -1; goto out_close; } - obj_node->id = pinned_info.id; - obj_node->path = strdup(fpath); - if (!obj_node->path) { - err = -1; - free(obj_node); + err = hashmap__append(build_fn_table, u32_as_hash_field(pinned_info.id), path); + if (err) { + p_err("failed to append entry to hashmap for ID %u, path '%s': %s", + pinned_info.id, path, strerror(errno)); goto out_close; } - hash_add(build_fn_table->table, &obj_node->hash, obj_node->id); out_close: close(fd); out_ret: return err; } -int build_pinned_obj_table(struct pinned_obj_table *tab, +int build_pinned_obj_table(struct hashmap *tab, enum bpf_obj_type type) { struct mntent *mntent = NULL; @@ -470,17 +469,18 @@ int build_pinned_obj_table(struct pinned_obj_table *tab, return err; } -void delete_pinned_obj_table(struct pinned_obj_table *tab) +void delete_pinned_obj_table(struct hashmap *map) { - struct pinned_obj *obj; - struct hlist_node *tmp; - unsigned int bkt; + struct hashmap_entry *entry; + size_t bkt; - hash_for_each_safe(tab->table, bkt, tmp, obj, hash) { - hash_del(&obj->hash); - free(obj->path); - free(obj); - } + if (!map) + return; + + hashmap__for_each_entry(map, entry, bkt) + free(entry->value); + + hashmap__free(map); } unsigned int get_page_size(void) @@ -962,3 +962,13 @@ int map_parse_fd_and_info(int *argc, char ***argv, void *info, __u32 *info_len) return fd; } + +size_t hash_fn_for_key_as_id(const void *key, void *ctx) +{ + return (size_t)key; +} + +bool equal_fn_for_key_as_id(const void *k1, const void *k2, void *ctx) +{ + return k1 == k2; +} diff --git a/tools/bpf/bpftool/link.c b/tools/bpf/bpftool/link.c index a5effb1816b78..404cc5459c6bc 100644 --- a/tools/bpf/bpftool/link.c +++ b/tools/bpf/bpftool/link.c @@ -7,6 +7,7 @@ #include #include +#include #include "json_writer.h" #include "main.h" @@ -20,7 +21,7 @@ static const char * const link_type_name[] = { [BPF_LINK_TYPE_NETNS] = "netns", }; -static struct pinned_obj_table link_table; +static struct hashmap *link_table; static int link_parse_fd(int *argc, char ***argv) { @@ -158,15 +159,14 @@ static int show_link_close_json(int fd, struct bpf_link_info *info) break; } - if (!hash_empty(link_table.table)) { - struct pinned_obj *obj; + if (!hashmap__empty(link_table)) { + struct hashmap_entry *entry; jsonw_name(json_wtr, "pinned"); jsonw_start_array(json_wtr); - hash_for_each_possible(link_table.table, obj, hash, info->id) { - if (obj->id == info->id) - jsonw_string(json_wtr, obj->path); - } + hashmap__for_each_key_entry(link_table, entry, + u32_as_hash_field(info->id)) + jsonw_string(json_wtr, entry->value); jsonw_end_array(json_wtr); } @@ -246,13 +246,12 @@ static int show_link_close_plain(int fd, struct bpf_link_info *info) break; } - if (!hash_empty(link_table.table)) { - struct pinned_obj *obj; + if (!hashmap__empty(link_table)) { + struct hashmap_entry *entry; - hash_for_each_possible(link_table.table, obj, hash, info->id) { - if (obj->id == info->id) - printf("\n\tpinned %s", obj->path); - } + hashmap__for_each_key_entry(link_table, entry, + u32_as_hash_field(info->id)) + printf("\n\tpinned %s", (char *)entry->value); } emit_obj_refs_plain(&refs_table, info->id, "\n\tpids "); @@ -305,8 +304,13 @@ static int do_show(int argc, char **argv) int err, fd; if (show_pinned) { - hash_init(link_table.table); - build_pinned_obj_table(&link_table, BPF_OBJ_LINK); + link_table = hashmap__new(hash_fn_for_key_as_id, + equal_fn_for_key_as_id, NULL); + if (!link_table) { + p_err("failed to create hashmap for pinned paths"); + return -1; + } + build_pinned_obj_table(link_table, BPF_OBJ_LINK); } build_obj_refs_table(&refs_table, BPF_OBJ_LINK); @@ -351,7 +355,7 @@ static int do_show(int argc, char **argv) delete_obj_refs_table(&refs_table); if (show_pinned) - delete_pinned_obj_table(&link_table); + delete_pinned_obj_table(link_table); return errno == ENOENT ? 0 : -1; } diff --git a/tools/bpf/bpftool/main.h b/tools/bpf/bpftool/main.h index baf607cd59241..0d64e39573f22 100644 --- a/tools/bpf/bpftool/main.h +++ b/tools/bpf/bpftool/main.h @@ -14,6 +14,7 @@ #include #include +#include #include #include "json_writer.h" @@ -105,16 +106,6 @@ void set_max_rlimit(void); int mount_tracefs(const char *target); -struct pinned_obj_table { - DECLARE_HASHTABLE(table, 16); -}; - -struct pinned_obj { - __u32 id; - char *path; - struct hlist_node hash; -}; - struct obj_refs_table { DECLARE_HASHTABLE(table, 16); }; @@ -134,9 +125,9 @@ struct obj_refs { struct btf; struct bpf_line_info; -int build_pinned_obj_table(struct pinned_obj_table *table, +int build_pinned_obj_table(struct hashmap *table, enum bpf_obj_type type); -void delete_pinned_obj_table(struct pinned_obj_table *tab); +void delete_pinned_obj_table(struct hashmap *table); __weak int build_obj_refs_table(struct obj_refs_table *table, enum bpf_obj_type type); __weak void delete_obj_refs_table(struct obj_refs_table *table); @@ -256,4 +247,18 @@ int do_filter_dump(struct tcmsg *ifinfo, struct nlattr **tb, const char *kind, int print_all_levels(__maybe_unused enum libbpf_print_level level, const char *format, va_list args); + +size_t hash_fn_for_key_as_id(const void *key, void *ctx); +bool equal_fn_for_key_as_id(const void *k1, const void *k2, void *ctx); + +static inline void *u32_as_hash_field(__u32 x) +{ + return (void *)(uintptr_t)x; +} + +static inline bool hashmap__empty(struct hashmap *map) +{ + return map ? hashmap__size(map) == 0 : true; +} + #endif diff --git a/tools/bpf/bpftool/map.c b/tools/bpf/bpftool/map.c index 0085039d96103..2647603c5e5d7 100644 --- a/tools/bpf/bpftool/map.c +++ b/tools/bpf/bpftool/map.c @@ -17,6 +17,7 @@ #include #include +#include #include "json_writer.h" #include "main.h" @@ -56,7 +57,7 @@ const char * const map_type_name[] = { const size_t map_type_name_size = ARRAY_SIZE(map_type_name); -static struct pinned_obj_table map_table; +static struct hashmap *map_table; static bool map_is_per_cpu(__u32 type) { @@ -537,15 +538,14 @@ static int show_map_close_json(int fd, struct bpf_map_info *info) if (info->btf_id) jsonw_int_field(json_wtr, "btf_id", info->btf_id); - if (!hash_empty(map_table.table)) { - struct pinned_obj *obj; + if (!hashmap__empty(map_table)) { + struct hashmap_entry *entry; jsonw_name(json_wtr, "pinned"); jsonw_start_array(json_wtr); - hash_for_each_possible(map_table.table, obj, hash, info->id) { - if (obj->id == info->id) - jsonw_string(json_wtr, obj->path); - } + hashmap__for_each_key_entry(map_table, entry, + u32_as_hash_field(info->id)) + jsonw_string(json_wtr, entry->value); jsonw_end_array(json_wtr); } @@ -612,13 +612,12 @@ static int show_map_close_plain(int fd, struct bpf_map_info *info) } close(fd); - if (!hash_empty(map_table.table)) { - struct pinned_obj *obj; + if (!hashmap__empty(map_table)) { + struct hashmap_entry *entry; - hash_for_each_possible(map_table.table, obj, hash, info->id) { - if (obj->id == info->id) - printf("\n\tpinned %s", obj->path); - } + hashmap__for_each_key_entry(map_table, entry, + u32_as_hash_field(info->id)) + printf("\n\tpinned %s", (char *)entry->value); } printf("\n"); @@ -697,8 +696,13 @@ static int do_show(int argc, char **argv) int fd; if (show_pinned) { - hash_init(map_table.table); - build_pinned_obj_table(&map_table, BPF_OBJ_MAP); + map_table = hashmap__new(hash_fn_for_key_as_id, + equal_fn_for_key_as_id, NULL); + if (!map_table) { + p_err("failed to create hashmap for pinned paths"); + return -1; + } + build_pinned_obj_table(map_table, BPF_OBJ_MAP); } build_obj_refs_table(&refs_table, BPF_OBJ_MAP); @@ -747,7 +751,7 @@ static int do_show(int argc, char **argv) delete_obj_refs_table(&refs_table); if (show_pinned) - delete_pinned_obj_table(&map_table); + delete_pinned_obj_table(map_table); return errno == ENOENT ? 0 : -1; } diff --git a/tools/bpf/bpftool/prog.c b/tools/bpf/bpftool/prog.c index 48c2fa4d068e5..8fce78ce6f8b3 100644 --- a/tools/bpf/bpftool/prog.c +++ b/tools/bpf/bpftool/prog.c @@ -24,6 +24,7 @@ #include #include +#include #include #include @@ -84,7 +85,7 @@ static const char * const attach_type_strings[] = { [__MAX_BPF_ATTACH_TYPE] = NULL, }; -static struct pinned_obj_table prog_table; +static struct hashmap *prog_table; static enum bpf_attach_type parse_attach_type(const char *str) { @@ -418,15 +419,14 @@ static void print_prog_json(struct bpf_prog_info *info, int fd) if (info->btf_id) jsonw_int_field(json_wtr, "btf_id", info->btf_id); - if (!hash_empty(prog_table.table)) { - struct pinned_obj *obj; + if (!hashmap__empty(prog_table)) { + struct hashmap_entry *entry; jsonw_name(json_wtr, "pinned"); jsonw_start_array(json_wtr); - hash_for_each_possible(prog_table.table, obj, hash, info->id) { - if (obj->id == info->id) - jsonw_string(json_wtr, obj->path); - } + hashmap__for_each_key_entry(prog_table, entry, + u32_as_hash_field(info->id)) + jsonw_string(json_wtr, entry->value); jsonw_end_array(json_wtr); } @@ -490,13 +490,12 @@ static void print_prog_plain(struct bpf_prog_info *info, int fd) if (info->nr_map_ids) show_prog_maps(fd, info->nr_map_ids); - if (!hash_empty(prog_table.table)) { - struct pinned_obj *obj; + if (!hashmap__empty(prog_table)) { + struct hashmap_entry *entry; - hash_for_each_possible(prog_table.table, obj, hash, info->id) { - if (obj->id == info->id) - printf("\n\tpinned %s", obj->path); - } + hashmap__for_each_key_entry(prog_table, entry, + u32_as_hash_field(info->id)) + printf("\n\tpinned %s", (char *)entry->value); } if (info->btf_id) @@ -570,8 +569,13 @@ static int do_show(int argc, char **argv) int fd; if (show_pinned) { - hash_init(prog_table.table); - build_pinned_obj_table(&prog_table, BPF_OBJ_PROG); + prog_table = hashmap__new(hash_fn_for_key_as_id, + equal_fn_for_key_as_id, NULL); + if (!prog_table) { + p_err("failed to create hashmap for pinned paths"); + return -1; + } + build_pinned_obj_table(prog_table, BPF_OBJ_PROG); } build_obj_refs_table(&refs_table, BPF_OBJ_PROG); @@ -618,7 +622,7 @@ static int do_show(int argc, char **argv) delete_obj_refs_table(&refs_table); if (show_pinned) - delete_pinned_obj_table(&prog_table); + delete_pinned_obj_table(prog_table); return err; } From 2828d0d75b738c9cd98080c530b7b7ea90b279dd Mon Sep 17 00:00:00 2001 From: Quentin Monnet Date: Sat, 23 Oct 2021 21:51:53 +0100 Subject: [PATCH 122/181] bpftool: Switch to libbpf's hashmap for programs/maps in BTF listing In order to show BPF programs and maps using BTF objects when the latter are being listed, bpftool creates hash maps to store all relevant items. This commit is part of a set that transitions from the kernel's hash map implementation to the one coming with libbpf. The motivation is to make bpftool less dependent of kernel headers, to ease the path to a potential out-of-tree mirror, like libbpf has. This commit focuses on the two hash maps used by bpftool when listing BTF objects to store references to programs and maps, and convert them to the libbpf's implementation. Signed-off-by: Quentin Monnet Signed-off-by: Andrii Nakryiko Link: https://lore.kernel.org/bpf/20211023205154.6710-5-quentin@isovalent.com --- tools/bpf/bpftool/btf.c | 125 ++++++++++++++++++--------------------- tools/bpf/bpftool/main.h | 5 ++ 2 files changed, 62 insertions(+), 68 deletions(-) diff --git a/tools/bpf/bpftool/btf.c b/tools/bpf/bpftool/btf.c index 0cd769adac662..11dd31b6e7304 100644 --- a/tools/bpf/bpftool/btf.c +++ b/tools/bpf/bpftool/btf.c @@ -8,14 +8,16 @@ #include #include #include -#include -#include -#include #include #include #include #include +#include +#include +#include +#include + #include "json_writer.h" #include "main.h" @@ -40,14 +42,9 @@ static const char * const btf_kind_str[NR_BTF_KINDS] = { [BTF_KIND_DECL_TAG] = "DECL_TAG", }; -struct btf_attach_table { - DECLARE_HASHTABLE(table, 16); -}; - struct btf_attach_point { __u32 obj_id; __u32 btf_id; - struct hlist_node hash; }; static const char *btf_int_enc_str(__u8 encoding) @@ -645,21 +642,8 @@ static int btf_parse_fd(int *argc, char ***argv) return fd; } -static void delete_btf_table(struct btf_attach_table *tab) -{ - struct btf_attach_point *obj; - struct hlist_node *tmp; - - unsigned int bkt; - - hash_for_each_safe(tab->table, bkt, tmp, obj, hash) { - hash_del(&obj->hash); - free(obj); - } -} - static int -build_btf_type_table(struct btf_attach_table *tab, enum bpf_obj_type type, +build_btf_type_table(struct hashmap *tab, enum bpf_obj_type type, void *info, __u32 *len) { static const char * const names[] = { @@ -667,7 +651,6 @@ build_btf_type_table(struct btf_attach_table *tab, enum bpf_obj_type type, [BPF_OBJ_PROG] = "prog", [BPF_OBJ_MAP] = "map", }; - struct btf_attach_point *obj_node; __u32 btf_id, id = 0; int err; int fd; @@ -741,28 +724,25 @@ build_btf_type_table(struct btf_attach_table *tab, enum bpf_obj_type type, if (!btf_id) continue; - obj_node = calloc(1, sizeof(*obj_node)); - if (!obj_node) { - p_err("failed to allocate memory: %s", strerror(errno)); - err = -ENOMEM; + err = hashmap__append(tab, u32_as_hash_field(btf_id), + u32_as_hash_field(id)); + if (err) { + p_err("failed to append entry to hashmap for BTF ID %u, object ID %u: %s", + btf_id, id, strerror(errno)); goto err_free; } - - obj_node->obj_id = id; - obj_node->btf_id = btf_id; - hash_add(tab->table, &obj_node->hash, obj_node->btf_id); } return 0; err_free: - delete_btf_table(tab); + hashmap__free(tab); return err; } static int -build_btf_tables(struct btf_attach_table *btf_prog_table, - struct btf_attach_table *btf_map_table) +build_btf_tables(struct hashmap *btf_prog_table, + struct hashmap *btf_map_table) { struct bpf_prog_info prog_info; __u32 prog_len = sizeof(prog_info); @@ -778,7 +758,7 @@ build_btf_tables(struct btf_attach_table *btf_prog_table, err = build_btf_type_table(btf_map_table, BPF_OBJ_MAP, &map_info, &map_len); if (err) { - delete_btf_table(btf_prog_table); + hashmap__free(btf_prog_table); return err; } @@ -787,10 +767,10 @@ build_btf_tables(struct btf_attach_table *btf_prog_table, static void show_btf_plain(struct bpf_btf_info *info, int fd, - struct btf_attach_table *btf_prog_table, - struct btf_attach_table *btf_map_table) + struct hashmap *btf_prog_table, + struct hashmap *btf_map_table) { - struct btf_attach_point *obj; + struct hashmap_entry *entry; const char *name = u64_to_ptr(info->name); int n; @@ -804,18 +784,19 @@ show_btf_plain(struct bpf_btf_info *info, int fd, printf("size %uB", info->btf_size); n = 0; - hash_for_each_possible(btf_prog_table->table, obj, hash, info->id) { - if (obj->btf_id == info->id) - printf("%s%u", n++ == 0 ? " prog_ids " : ",", - obj->obj_id); + hashmap__for_each_key_entry(btf_prog_table, entry, + u32_as_hash_field(info->id)) { + printf("%s%u", n++ == 0 ? " prog_ids " : ",", + hash_field_as_u32(entry->value)); } n = 0; - hash_for_each_possible(btf_map_table->table, obj, hash, info->id) { - if (obj->btf_id == info->id) - printf("%s%u", n++ == 0 ? " map_ids " : ",", - obj->obj_id); + hashmap__for_each_key_entry(btf_map_table, entry, + u32_as_hash_field(info->id)) { + printf("%s%u", n++ == 0 ? " map_ids " : ",", + hash_field_as_u32(entry->value)); } + emit_obj_refs_plain(&refs_table, info->id, "\n\tpids "); printf("\n"); @@ -823,10 +804,10 @@ show_btf_plain(struct bpf_btf_info *info, int fd, static void show_btf_json(struct bpf_btf_info *info, int fd, - struct btf_attach_table *btf_prog_table, - struct btf_attach_table *btf_map_table) + struct hashmap *btf_prog_table, + struct hashmap *btf_map_table) { - struct btf_attach_point *obj; + struct hashmap_entry *entry; const char *name = u64_to_ptr(info->name); jsonw_start_object(json_wtr); /* btf object */ @@ -835,19 +816,17 @@ show_btf_json(struct bpf_btf_info *info, int fd, jsonw_name(json_wtr, "prog_ids"); jsonw_start_array(json_wtr); /* prog_ids */ - hash_for_each_possible(btf_prog_table->table, obj, hash, - info->id) { - if (obj->btf_id == info->id) - jsonw_uint(json_wtr, obj->obj_id); + hashmap__for_each_key_entry(btf_prog_table, entry, + u32_as_hash_field(info->id)) { + jsonw_uint(json_wtr, hash_field_as_u32(entry->value)); } jsonw_end_array(json_wtr); /* prog_ids */ jsonw_name(json_wtr, "map_ids"); jsonw_start_array(json_wtr); /* map_ids */ - hash_for_each_possible(btf_map_table->table, obj, hash, - info->id) { - if (obj->btf_id == info->id) - jsonw_uint(json_wtr, obj->obj_id); + hashmap__for_each_key_entry(btf_map_table, entry, + u32_as_hash_field(info->id)) { + jsonw_uint(json_wtr, hash_field_as_u32(entry->value)); } jsonw_end_array(json_wtr); /* map_ids */ @@ -862,8 +841,8 @@ show_btf_json(struct bpf_btf_info *info, int fd, } static int -show_btf(int fd, struct btf_attach_table *btf_prog_table, - struct btf_attach_table *btf_map_table) +show_btf(int fd, struct hashmap *btf_prog_table, + struct hashmap *btf_map_table) { struct bpf_btf_info info; __u32 len = sizeof(info); @@ -900,8 +879,8 @@ show_btf(int fd, struct btf_attach_table *btf_prog_table, static int do_show(int argc, char **argv) { - struct btf_attach_table btf_prog_table; - struct btf_attach_table btf_map_table; + struct hashmap *btf_prog_table; + struct hashmap *btf_map_table; int err, fd = -1; __u32 id = 0; @@ -917,9 +896,19 @@ static int do_show(int argc, char **argv) return BAD_ARG(); } - hash_init(btf_prog_table.table); - hash_init(btf_map_table.table); - err = build_btf_tables(&btf_prog_table, &btf_map_table); + btf_prog_table = hashmap__new(hash_fn_for_key_as_id, + equal_fn_for_key_as_id, NULL); + btf_map_table = hashmap__new(hash_fn_for_key_as_id, + equal_fn_for_key_as_id, NULL); + if (!btf_prog_table || !btf_map_table) { + hashmap__free(btf_prog_table); + hashmap__free(btf_map_table); + if (fd >= 0) + close(fd); + p_err("failed to create hashmap for object references"); + return -1; + } + err = build_btf_tables(btf_prog_table, btf_map_table); if (err) { if (fd >= 0) close(fd); @@ -928,7 +917,7 @@ static int do_show(int argc, char **argv) build_obj_refs_table(&refs_table, BPF_OBJ_BTF); if (fd >= 0) { - err = show_btf(fd, &btf_prog_table, &btf_map_table); + err = show_btf(fd, btf_prog_table, btf_map_table); close(fd); goto exit_free; } @@ -960,7 +949,7 @@ static int do_show(int argc, char **argv) break; } - err = show_btf(fd, &btf_prog_table, &btf_map_table); + err = show_btf(fd, btf_prog_table, btf_map_table); close(fd); if (err) break; @@ -970,8 +959,8 @@ static int do_show(int argc, char **argv) jsonw_end_array(json_wtr); /* root array */ exit_free: - delete_btf_table(&btf_prog_table); - delete_btf_table(&btf_map_table); + hashmap__free(btf_prog_table); + hashmap__free(btf_map_table); delete_obj_refs_table(&refs_table); return err; diff --git a/tools/bpf/bpftool/main.h b/tools/bpf/bpftool/main.h index 0d64e39573f22..f6f2f951b1938 100644 --- a/tools/bpf/bpftool/main.h +++ b/tools/bpf/bpftool/main.h @@ -256,6 +256,11 @@ static inline void *u32_as_hash_field(__u32 x) return (void *)(uintptr_t)x; } +static inline __u32 hash_field_as_u32(const void *x) +{ + return (__u32)(uintptr_t)x; +} + static inline bool hashmap__empty(struct hashmap *map) { return map ? hashmap__size(map) == 0 : true; From d6699f8e0f834b40db35466f704705ae757be11a Mon Sep 17 00:00:00 2001 From: Quentin Monnet Date: Sat, 23 Oct 2021 21:51:54 +0100 Subject: [PATCH 123/181] bpftool: Switch to libbpf's hashmap for PIDs/names references In order to show PIDs and names for processes holding references to BPF programs, maps, links, or BTF objects, bpftool creates hash maps to store all relevant information. This commit is part of a set that transitions from the kernel's hash map implementation to the one coming with libbpf. The motivation is to make bpftool less dependent of kernel headers, to ease the path to a potential out-of-tree mirror, like libbpf has. This is the third and final step of the transition, in which we convert the hash maps used for storing the information about the processes holding references to BPF objects (programs, maps, links, BTF), and at last we drop the inclusion of tools/include/linux/hashtable.h. Note: Checkpatch complains about the use of __weak declarations, and the missing empty lines after the bunch of empty function declarations when compiling without the BPF skeletons (none of these were introduced in this patch). We want to keep things as they are, and the reports should be safe to ignore. Signed-off-by: Quentin Monnet Signed-off-by: Andrii Nakryiko Link: https://lore.kernel.org/bpf/20211023205154.6710-6-quentin@isovalent.com --- tools/bpf/bpftool/btf.c | 7 ++-- tools/bpf/bpftool/link.c | 6 +-- tools/bpf/bpftool/main.c | 5 ++- tools/bpf/bpftool/main.h | 17 +++----- tools/bpf/bpftool/map.c | 6 +-- tools/bpf/bpftool/pids.c | 90 +++++++++++++++++++++++----------------- tools/bpf/bpftool/prog.c | 6 +-- 7 files changed, 72 insertions(+), 65 deletions(-) diff --git a/tools/bpf/bpftool/btf.c b/tools/bpf/bpftool/btf.c index 11dd31b6e7304..015d2758f8264 100644 --- a/tools/bpf/bpftool/btf.c +++ b/tools/bpf/bpftool/btf.c @@ -9,7 +9,6 @@ #include #include #include -#include #include #include @@ -797,7 +796,7 @@ show_btf_plain(struct bpf_btf_info *info, int fd, hash_field_as_u32(entry->value)); } - emit_obj_refs_plain(&refs_table, info->id, "\n\tpids "); + emit_obj_refs_plain(refs_table, info->id, "\n\tpids "); printf("\n"); } @@ -830,7 +829,7 @@ show_btf_json(struct bpf_btf_info *info, int fd, } jsonw_end_array(json_wtr); /* map_ids */ - emit_obj_refs_json(&refs_table, info->id, json_wtr); /* pids */ + emit_obj_refs_json(refs_table, info->id, json_wtr); /* pids */ jsonw_bool_field(json_wtr, "kernel", info->kernel_btf); @@ -961,7 +960,7 @@ static int do_show(int argc, char **argv) exit_free: hashmap__free(btf_prog_table); hashmap__free(btf_map_table); - delete_obj_refs_table(&refs_table); + delete_obj_refs_table(refs_table); return err; } diff --git a/tools/bpf/bpftool/link.c b/tools/bpf/bpftool/link.c index 404cc5459c6bc..2c258db0d3521 100644 --- a/tools/bpf/bpftool/link.c +++ b/tools/bpf/bpftool/link.c @@ -170,7 +170,7 @@ static int show_link_close_json(int fd, struct bpf_link_info *info) jsonw_end_array(json_wtr); } - emit_obj_refs_json(&refs_table, info->id, json_wtr); + emit_obj_refs_json(refs_table, info->id, json_wtr); jsonw_end_object(json_wtr); @@ -253,7 +253,7 @@ static int show_link_close_plain(int fd, struct bpf_link_info *info) u32_as_hash_field(info->id)) printf("\n\tpinned %s", (char *)entry->value); } - emit_obj_refs_plain(&refs_table, info->id, "\n\tpids "); + emit_obj_refs_plain(refs_table, info->id, "\n\tpids "); printf("\n"); @@ -352,7 +352,7 @@ static int do_show(int argc, char **argv) if (json_output) jsonw_end_array(json_wtr); - delete_obj_refs_table(&refs_table); + delete_obj_refs_table(refs_table); if (show_pinned) delete_pinned_obj_table(link_table); diff --git a/tools/bpf/bpftool/main.c b/tools/bpf/bpftool/main.c index 7a33f0e6da281..28237d7cef67f 100644 --- a/tools/bpf/bpftool/main.c +++ b/tools/bpf/bpftool/main.c @@ -10,8 +10,9 @@ #include #include -#include #include +#include +#include #include "main.h" @@ -31,7 +32,7 @@ bool verifier_logs; bool relaxed_maps; bool use_loader; struct btf *base_btf; -struct obj_refs_table refs_table; +struct hashmap *refs_table; static void __noreturn clean_and_exit(int i) { diff --git a/tools/bpf/bpftool/main.h b/tools/bpf/bpftool/main.h index f6f2f951b1938..383835c2604d0 100644 --- a/tools/bpf/bpftool/main.h +++ b/tools/bpf/bpftool/main.h @@ -11,7 +11,6 @@ #include #include #include -#include #include #include @@ -92,7 +91,7 @@ extern bool verifier_logs; extern bool relaxed_maps; extern bool use_loader; extern struct btf *base_btf; -extern struct obj_refs_table refs_table; +extern struct hashmap *refs_table; void __printf(1, 2) p_err(const char *fmt, ...); void __printf(1, 2) p_info(const char *fmt, ...); @@ -106,18 +105,12 @@ void set_max_rlimit(void); int mount_tracefs(const char *target); -struct obj_refs_table { - DECLARE_HASHTABLE(table, 16); -}; - struct obj_ref { int pid; char comm[16]; }; struct obj_refs { - struct hlist_node node; - __u32 id; int ref_cnt; struct obj_ref *refs; }; @@ -128,12 +121,12 @@ struct bpf_line_info; int build_pinned_obj_table(struct hashmap *table, enum bpf_obj_type type); void delete_pinned_obj_table(struct hashmap *table); -__weak int build_obj_refs_table(struct obj_refs_table *table, +__weak int build_obj_refs_table(struct hashmap **table, enum bpf_obj_type type); -__weak void delete_obj_refs_table(struct obj_refs_table *table); -__weak void emit_obj_refs_json(struct obj_refs_table *table, __u32 id, +__weak void delete_obj_refs_table(struct hashmap *table); +__weak void emit_obj_refs_json(struct hashmap *table, __u32 id, json_writer_t *json_wtr); -__weak void emit_obj_refs_plain(struct obj_refs_table *table, __u32 id, +__weak void emit_obj_refs_plain(struct hashmap *table, __u32 id, const char *prefix); void print_dev_plain(__u32 ifindex, __u64 ns_dev, __u64 ns_inode); void print_dev_json(__u32 ifindex, __u64 ns_dev, __u64 ns_inode); diff --git a/tools/bpf/bpftool/map.c b/tools/bpf/bpftool/map.c index 2647603c5e5d7..cae1f11192969 100644 --- a/tools/bpf/bpftool/map.c +++ b/tools/bpf/bpftool/map.c @@ -549,7 +549,7 @@ static int show_map_close_json(int fd, struct bpf_map_info *info) jsonw_end_array(json_wtr); } - emit_obj_refs_json(&refs_table, info->id, json_wtr); + emit_obj_refs_json(refs_table, info->id, json_wtr); jsonw_end_object(json_wtr); @@ -637,7 +637,7 @@ static int show_map_close_plain(int fd, struct bpf_map_info *info) if (frozen) printf("%sfrozen", info->btf_id ? " " : ""); - emit_obj_refs_plain(&refs_table, info->id, "\n\tpids "); + emit_obj_refs_plain(refs_table, info->id, "\n\tpids "); printf("\n"); return 0; @@ -748,7 +748,7 @@ static int do_show(int argc, char **argv) if (json_output) jsonw_end_array(json_wtr); - delete_obj_refs_table(&refs_table); + delete_obj_refs_table(refs_table); if (show_pinned) delete_pinned_obj_table(map_table); diff --git a/tools/bpf/bpftool/pids.c b/tools/bpf/bpftool/pids.c index 477e55d59c348..56b598eee043a 100644 --- a/tools/bpf/bpftool/pids.c +++ b/tools/bpf/bpftool/pids.c @@ -6,35 +6,37 @@ #include #include #include + #include +#include #include "main.h" #include "skeleton/pid_iter.h" #ifdef BPFTOOL_WITHOUT_SKELETONS -int build_obj_refs_table(struct obj_refs_table *table, enum bpf_obj_type type) +int build_obj_refs_table(struct hashmap **map, enum bpf_obj_type type) { return -ENOTSUP; } -void delete_obj_refs_table(struct obj_refs_table *table) {} -void emit_obj_refs_plain(struct obj_refs_table *table, __u32 id, const char *prefix) {} -void emit_obj_refs_json(struct obj_refs_table *table, __u32 id, json_writer_t *json_writer) {} +void delete_obj_refs_table(struct hashmap *map) {} +void emit_obj_refs_plain(struct hashmap *map, __u32 id, const char *prefix) {} +void emit_obj_refs_json(struct hashmap *map, __u32 id, json_writer_t *json_writer) {} #else /* BPFTOOL_WITHOUT_SKELETONS */ #include "pid_iter.skel.h" -static void add_ref(struct obj_refs_table *table, struct pid_iter_entry *e) +static void add_ref(struct hashmap *map, struct pid_iter_entry *e) { + struct hashmap_entry *entry; struct obj_refs *refs; struct obj_ref *ref; + int err, i; void *tmp; - int i; - hash_for_each_possible(table->table, refs, node, e->id) { - if (refs->id != e->id) - continue; + hashmap__for_each_key_entry(map, entry, u32_as_hash_field(e->id)) { + refs = entry->value; for (i = 0; i < refs->ref_cnt; i++) { if (refs->refs[i].pid == e->pid) @@ -64,7 +66,6 @@ static void add_ref(struct obj_refs_table *table, struct pid_iter_entry *e) return; } - refs->id = e->id; refs->refs = malloc(sizeof(*refs->refs)); if (!refs->refs) { free(refs); @@ -76,7 +77,11 @@ static void add_ref(struct obj_refs_table *table, struct pid_iter_entry *e) ref->pid = e->pid; memcpy(ref->comm, e->comm, sizeof(ref->comm)); refs->ref_cnt = 1; - hash_add(table->table, &refs->node, e->id); + + err = hashmap__append(map, u32_as_hash_field(e->id), refs); + if (err) + p_err("failed to append entry to hashmap for ID %u: %s", + e->id, strerror(errno)); } static int __printf(2, 0) @@ -87,7 +92,7 @@ libbpf_print_none(__maybe_unused enum libbpf_print_level level, return 0; } -int build_obj_refs_table(struct obj_refs_table *table, enum bpf_obj_type type) +int build_obj_refs_table(struct hashmap **map, enum bpf_obj_type type) { struct pid_iter_entry *e; char buf[4096 / sizeof(*e) * sizeof(*e)]; @@ -95,7 +100,11 @@ int build_obj_refs_table(struct obj_refs_table *table, enum bpf_obj_type type) int err, ret, fd = -1, i; libbpf_print_fn_t default_print; - hash_init(table->table); + *map = hashmap__new(hash_fn_for_key_as_id, equal_fn_for_key_as_id, NULL); + if (!*map) { + p_err("failed to create hashmap for PID references"); + return -1; + } set_max_rlimit(); skel = pid_iter_bpf__open(); @@ -151,7 +160,7 @@ int build_obj_refs_table(struct obj_refs_table *table, enum bpf_obj_type type) e = (void *)buf; for (i = 0; i < ret; i++, e++) { - add_ref(table, e); + add_ref(*map, e); } } err = 0; @@ -162,39 +171,44 @@ int build_obj_refs_table(struct obj_refs_table *table, enum bpf_obj_type type) return err; } -void delete_obj_refs_table(struct obj_refs_table *table) +void delete_obj_refs_table(struct hashmap *map) { - struct obj_refs *refs; - struct hlist_node *tmp; - unsigned int bkt; + struct hashmap_entry *entry; + size_t bkt; + + if (!map) + return; + + hashmap__for_each_entry(map, entry, bkt) { + struct obj_refs *refs = entry->value; - hash_for_each_safe(table->table, bkt, tmp, refs, node) { - hash_del(&refs->node); free(refs->refs); free(refs); } + + hashmap__free(map); } -void emit_obj_refs_json(struct obj_refs_table *table, __u32 id, +void emit_obj_refs_json(struct hashmap *map, __u32 id, json_writer_t *json_writer) { - struct obj_refs *refs; - struct obj_ref *ref; - int i; + struct hashmap_entry *entry; - if (hash_empty(table->table)) + if (hashmap__empty(map)) return; - hash_for_each_possible(table->table, refs, node, id) { - if (refs->id != id) - continue; + hashmap__for_each_key_entry(map, entry, u32_as_hash_field(id)) { + struct obj_refs *refs = entry->value; + int i; + if (refs->ref_cnt == 0) break; jsonw_name(json_writer, "pids"); jsonw_start_array(json_writer); for (i = 0; i < refs->ref_cnt; i++) { - ref = &refs->refs[i]; + struct obj_ref *ref = &refs->refs[i]; + jsonw_start_object(json_writer); jsonw_int_field(json_writer, "pid", ref->pid); jsonw_string_field(json_writer, "comm", ref->comm); @@ -205,24 +219,24 @@ void emit_obj_refs_json(struct obj_refs_table *table, __u32 id, } } -void emit_obj_refs_plain(struct obj_refs_table *table, __u32 id, const char *prefix) +void emit_obj_refs_plain(struct hashmap *map, __u32 id, const char *prefix) { - struct obj_refs *refs; - struct obj_ref *ref; - int i; + struct hashmap_entry *entry; - if (hash_empty(table->table)) + if (hashmap__empty(map)) return; - hash_for_each_possible(table->table, refs, node, id) { - if (refs->id != id) - continue; + hashmap__for_each_key_entry(map, entry, u32_as_hash_field(id)) { + struct obj_refs *refs = entry->value; + int i; + if (refs->ref_cnt == 0) break; printf("%s", prefix); for (i = 0; i < refs->ref_cnt; i++) { - ref = &refs->refs[i]; + struct obj_ref *ref = &refs->refs[i]; + printf("%s%s(%d)", i == 0 ? "" : ", ", ref->comm, ref->pid); } break; diff --git a/tools/bpf/bpftool/prog.c b/tools/bpf/bpftool/prog.c index 8fce78ce6f8b3..515d229526026 100644 --- a/tools/bpf/bpftool/prog.c +++ b/tools/bpf/bpftool/prog.c @@ -430,7 +430,7 @@ static void print_prog_json(struct bpf_prog_info *info, int fd) jsonw_end_array(json_wtr); } - emit_obj_refs_json(&refs_table, info->id, json_wtr); + emit_obj_refs_json(refs_table, info->id, json_wtr); show_prog_metadata(fd, info->nr_map_ids); @@ -501,7 +501,7 @@ static void print_prog_plain(struct bpf_prog_info *info, int fd) if (info->btf_id) printf("\n\tbtf_id %d", info->btf_id); - emit_obj_refs_plain(&refs_table, info->id, "\n\tpids "); + emit_obj_refs_plain(refs_table, info->id, "\n\tpids "); printf("\n"); @@ -619,7 +619,7 @@ static int do_show(int argc, char **argv) if (json_output) jsonw_end_array(json_wtr); - delete_obj_refs_table(&refs_table); + delete_obj_refs_table(refs_table); if (show_pinned) delete_pinned_obj_table(prog_table); From de5d0dcef602de39070c31c7e56c58249c56ba37 Mon Sep 17 00:00:00 2001 From: Andrii Nakryiko Date: Mon, 25 Oct 2021 15:45:28 -0700 Subject: [PATCH 124/181] libbpf: Fix off-by-one bug in bpf_core_apply_relo() Fix instruction index validity check which has off-by-one error. Fixes: 3ee4f5335511 ("libbpf: Split bpf_core_apply_relo() into bpf_program independent helper.") Signed-off-by: Andrii Nakryiko Signed-off-by: Alexei Starovoitov Link: https://lore.kernel.org/bpf/20211025224531.1088894-2-andrii@kernel.org --- tools/lib/bpf/libbpf.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tools/lib/bpf/libbpf.c b/tools/lib/bpf/libbpf.c index 604abe00785f8..e27a249d46fb9 100644 --- a/tools/lib/bpf/libbpf.c +++ b/tools/lib/bpf/libbpf.c @@ -5405,7 +5405,7 @@ static int bpf_core_apply_relo(struct bpf_program *prog, * relocated, so it's enough to just subtract in-section offset */ insn_idx = insn_idx - prog->sec_insn_off; - if (insn_idx > prog->insns_cnt) + if (insn_idx >= prog->insns_cnt) return -EINVAL; insn = &prog->insns[insn_idx]; From 65a7fa2e4e5381d205d3b0098da0fc8471fbbfb6 Mon Sep 17 00:00:00 2001 From: Andrii Nakryiko Date: Mon, 25 Oct 2021 15:45:29 -0700 Subject: [PATCH 125/181] libbpf: Add ability to fetch bpf_program's underlying instructions Add APIs providing read-only access to bpf_program BPF instructions ([0]). This is useful for diagnostics purposes, but it also allows a cleaner support for cloning BPF programs after libbpf did all the FD resolution and CO-RE relocations, subprog instructions appending, etc. Currently, cloning BPF program is possible only through hijacking a half-broken bpf_program__set_prep() API, which doesn't really work well for anything but most primitive programs. For instance, set_prep() API doesn't allow adjusting BPF program load parameters which are necessary for loading fentry/fexit BPF programs (the case where BPF program cloning is a necessity if doing some sort of mass-attachment functionality). Given bpf_program__set_prep() API is set to be deprecated, having a cleaner alternative is a must. libbpf internally already keeps track of linear array of struct bpf_insn, so it's not hard to expose it. The only gotcha is that libbpf previously freed instructions array during bpf_object load time, which would make this API much less useful overall, because in between bpf_object__open() and bpf_object__load() a lot of changes to instructions are done by libbpf. So this patch makes libbpf hold onto prog->insns array even after BPF program loading. I think this is a small price for added functionality and improved introspection of BPF program code. See retsnoop PR ([1]) for how it can be used in practice and code savings compared to relying on bpf_program__set_prep(). [0] Closes: https://github.com/libbpf/libbpf/issues/298 [1] https://github.com/anakryiko/retsnoop/pull/1 Signed-off-by: Andrii Nakryiko Signed-off-by: Alexei Starovoitov Link: https://lore.kernel.org/bpf/20211025224531.1088894-3-andrii@kernel.org --- tools/lib/bpf/libbpf.c | 12 ++++++++++-- tools/lib/bpf/libbpf.h | 36 ++++++++++++++++++++++++++++++++++-- tools/lib/bpf/libbpf.map | 2 ++ 3 files changed, 46 insertions(+), 4 deletions(-) diff --git a/tools/lib/bpf/libbpf.c b/tools/lib/bpf/libbpf.c index e27a249d46fb9..dc86ad24dfcb8 100644 --- a/tools/lib/bpf/libbpf.c +++ b/tools/lib/bpf/libbpf.c @@ -6653,8 +6653,6 @@ int bpf_program__load(struct bpf_program *prog, char *license, __u32 kern_ver) out: if (err) pr_warn("failed to load program '%s'\n", prog->name); - zfree(&prog->insns); - prog->insns_cnt = 0; return libbpf_err(err); } @@ -8143,6 +8141,16 @@ size_t bpf_program__size(const struct bpf_program *prog) return prog->insns_cnt * BPF_INSN_SZ; } +const struct bpf_insn *bpf_program__insns(const struct bpf_program *prog) +{ + return prog->insns; +} + +size_t bpf_program__insn_cnt(const struct bpf_program *prog) +{ + return prog->insns_cnt; +} + int bpf_program__set_prep(struct bpf_program *prog, int nr_instances, bpf_program_prep_t prep) { diff --git a/tools/lib/bpf/libbpf.h b/tools/lib/bpf/libbpf.h index 89ca9c83ed4ed..c6bcc5b989062 100644 --- a/tools/lib/bpf/libbpf.h +++ b/tools/lib/bpf/libbpf.h @@ -226,6 +226,40 @@ LIBBPF_API int bpf_program__set_autoload(struct bpf_program *prog, bool autoload /* returns program size in bytes */ LIBBPF_API size_t bpf_program__size(const struct bpf_program *prog); +struct bpf_insn; + +/** + * @brief **bpf_program__insns()** gives read-only access to BPF program's + * underlying BPF instructions. + * @param prog BPF program for which to return instructions + * @return a pointer to an array of BPF instructions that belong to the + * specified BPF program + * + * Returned pointer is always valid and not NULL. Number of `struct bpf_insn` + * pointed to can be fetched using **bpf_program__insn_cnt()** API. + * + * Keep in mind, libbpf can modify and append/delete BPF program's + * instructions as it processes BPF object file and prepares everything for + * uploading into the kernel. So depending on the point in BPF object + * lifetime, **bpf_program__insns()** can return different sets of + * instructions. As an example, during BPF object load phase BPF program + * instructions will be CO-RE-relocated, BPF subprograms instructions will be + * appended, ldimm64 instructions will have FDs embedded, etc. So instructions + * returned before **bpf_object__load()** and after it might be quite + * different. + */ +LIBBPF_API const struct bpf_insn *bpf_program__insns(const struct bpf_program *prog); +/** + * @brief **bpf_program__insn_cnt()** returns number of `struct bpf_insn`'s + * that form specified BPF program. + * @param prog BPF program for which to return number of BPF instructions + * + * See **bpf_program__insns()** documentation for notes on how libbpf can + * change instructions and their count during different phases of + * **bpf_object** lifetime. + */ +LIBBPF_API size_t bpf_program__insn_cnt(const struct bpf_program *prog); + LIBBPF_API int bpf_program__load(struct bpf_program *prog, char *license, __u32 kern_version); LIBBPF_API int bpf_program__fd(const struct bpf_program *prog); @@ -365,8 +399,6 @@ LIBBPF_API struct bpf_link * bpf_program__attach_iter(const struct bpf_program *prog, const struct bpf_iter_attach_opts *opts); -struct bpf_insn; - /* * Libbpf allows callers to adjust BPF programs before being loaded * into kernel. One program in an object file can be transformed into diff --git a/tools/lib/bpf/libbpf.map b/tools/lib/bpf/libbpf.map index 116964a29e44c..15239c05659cd 100644 --- a/tools/lib/bpf/libbpf.map +++ b/tools/lib/bpf/libbpf.map @@ -393,6 +393,8 @@ LIBBPF_0.6.0 { bpf_object__next_program; bpf_object__prev_map; bpf_object__prev_program; + bpf_program__insn_cnt; + bpf_program__insns; btf__add_btf; btf__add_decl_tag; btf__raw_data; From e21d585cb3db65a207cd338c74b9886090ef1ceb Mon Sep 17 00:00:00 2001 From: Andrii Nakryiko Date: Mon, 25 Oct 2021 15:45:30 -0700 Subject: [PATCH 126/181] libbpf: Deprecate multi-instance bpf_program APIs Schedule deprecation of a set of APIs that are related to multi-instance bpf_programs: - bpf_program__set_prep() ([0]); - bpf_program__{set,unset}_instance() ([1]); - bpf_program__nth_fd(). These APIs are obscure, very niche, and don't seem to be used much in practice. bpf_program__set_prep() is pretty useless for anything but the simplest BPF programs, as it doesn't allow to adjust BPF program load attributes, among other things. In short, it already bitrotted and will bitrot some more if not removed. With bpf_program__insns() API, which gives access to post-processed BPF program instructions of any given entry-point BPF program, it's now possible to do whatever necessary adjustments were possible with set_prep() API before, but also more. Given any such use case is automatically an advanced use case, requiring users to stick to low-level bpf_prog_load() APIs and managing their own prog FDs is reasonable. [0] Closes: https://github.com/libbpf/libbpf/issues/299 [1] Closes: https://github.com/libbpf/libbpf/issues/300 Signed-off-by: Andrii Nakryiko Signed-off-by: Alexei Starovoitov Link: https://lore.kernel.org/bpf/20211025224531.1088894-4-andrii@kernel.org --- tools/lib/bpf/libbpf.c | 22 +++++++++++++--------- tools/lib/bpf/libbpf.h | 6 +++++- 2 files changed, 18 insertions(+), 10 deletions(-) diff --git a/tools/lib/bpf/libbpf.c b/tools/lib/bpf/libbpf.c index dc86ad24dfcb8..4b21586575071 100644 --- a/tools/lib/bpf/libbpf.c +++ b/tools/lib/bpf/libbpf.c @@ -7358,8 +7358,7 @@ static int check_path(const char *path) return err; } -int bpf_program__pin_instance(struct bpf_program *prog, const char *path, - int instance) +static int bpf_program_pin_instance(struct bpf_program *prog, const char *path, int instance) { char *cp, errmsg[STRERR_BUFSIZE]; int err; @@ -7394,8 +7393,7 @@ int bpf_program__pin_instance(struct bpf_program *prog, const char *path, return 0; } -int bpf_program__unpin_instance(struct bpf_program *prog, const char *path, - int instance) +static int bpf_program_unpin_instance(struct bpf_program *prog, const char *path, int instance) { int err; @@ -7423,6 +7421,12 @@ int bpf_program__unpin_instance(struct bpf_program *prog, const char *path, return 0; } +__attribute__((alias("bpf_program_pin_instance"))) +int bpf_object__pin_instance(struct bpf_program *prog, const char *path, int instance); + +__attribute__((alias("bpf_program_unpin_instance"))) +int bpf_program__unpin_instance(struct bpf_program *prog, const char *path, int instance); + int bpf_program__pin(struct bpf_program *prog, const char *path) { int i, err; @@ -7447,7 +7451,7 @@ int bpf_program__pin(struct bpf_program *prog, const char *path) if (prog->instances.nr == 1) { /* don't create subdirs when pinning single instance */ - return bpf_program__pin_instance(prog, path, 0); + return bpf_program_pin_instance(prog, path, 0); } for (i = 0; i < prog->instances.nr; i++) { @@ -7463,7 +7467,7 @@ int bpf_program__pin(struct bpf_program *prog, const char *path) goto err_unpin; } - err = bpf_program__pin_instance(prog, buf, i); + err = bpf_program_pin_instance(prog, buf, i); if (err) goto err_unpin; } @@ -7481,7 +7485,7 @@ int bpf_program__pin(struct bpf_program *prog, const char *path) else if (len >= PATH_MAX) continue; - bpf_program__unpin_instance(prog, buf, i); + bpf_program_unpin_instance(prog, buf, i); } rmdir(path); @@ -7509,7 +7513,7 @@ int bpf_program__unpin(struct bpf_program *prog, const char *path) if (prog->instances.nr == 1) { /* don't create subdirs when pinning single instance */ - return bpf_program__unpin_instance(prog, path, 0); + return bpf_program_unpin_instance(prog, path, 0); } for (i = 0; i < prog->instances.nr; i++) { @@ -7522,7 +7526,7 @@ int bpf_program__unpin(struct bpf_program *prog, const char *path) else if (len >= PATH_MAX) return libbpf_err(-ENAMETOOLONG); - err = bpf_program__unpin_instance(prog, buf, i); + err = bpf_program_unpin_instance(prog, buf, i); if (err) return err; } diff --git a/tools/lib/bpf/libbpf.h b/tools/lib/bpf/libbpf.h index c6bcc5b989062..449eeed80be66 100644 --- a/tools/lib/bpf/libbpf.h +++ b/tools/lib/bpf/libbpf.h @@ -263,9 +263,11 @@ LIBBPF_API size_t bpf_program__insn_cnt(const struct bpf_program *prog); LIBBPF_API int bpf_program__load(struct bpf_program *prog, char *license, __u32 kern_version); LIBBPF_API int bpf_program__fd(const struct bpf_program *prog); +LIBBPF_DEPRECATED_SINCE(0, 7, "multi-instance bpf_program support is deprecated") LIBBPF_API int bpf_program__pin_instance(struct bpf_program *prog, const char *path, int instance); +LIBBPF_DEPRECATED_SINCE(0, 7, "multi-instance bpf_program support is deprecated") LIBBPF_API int bpf_program__unpin_instance(struct bpf_program *prog, const char *path, int instance); @@ -427,7 +429,7 @@ bpf_program__attach_iter(const struct bpf_program *prog, * one instance. In this case bpf_program__fd(prog) is equal to * bpf_program__nth_fd(prog, 0). */ - +LIBBPF_DEPRECATED_SINCE(0, 7, "use bpf_program__insns() for getting bpf_program instructions") struct bpf_prog_prep_result { /* * If not NULL, load new instruction array. @@ -456,9 +458,11 @@ typedef int (*bpf_program_prep_t)(struct bpf_program *prog, int n, struct bpf_insn *insns, int insns_cnt, struct bpf_prog_prep_result *res); +LIBBPF_DEPRECATED_SINCE(0, 7, "use bpf_program__insns() for getting bpf_program instructions") LIBBPF_API int bpf_program__set_prep(struct bpf_program *prog, int nr_instance, bpf_program_prep_t prep); +LIBBPF_DEPRECATED_SINCE(0, 7, "multi-instance bpf_program support is deprecated") LIBBPF_API int bpf_program__nth_fd(const struct bpf_program *prog, int n); /* From c4813e969ac471af730902377a2656b6b1b92c4d Mon Sep 17 00:00:00 2001 From: Andrii Nakryiko Date: Mon, 25 Oct 2021 15:45:31 -0700 Subject: [PATCH 127/181] libbpf: Deprecate ambiguously-named bpf_program__size() API The name of the API doesn't convey clearly that this size is in number of bytes (there needed to be a separate comment to make this clear in libbpf.h). Further, measuring the size of BPF program in bytes is not exactly the best fit, because BPF programs always consist of 8-byte instructions. As such, bpf_program__insn_cnt() is a better alternative in pretty much any imaginable case. So schedule bpf_program__size() deprecation starting from v0.7 and it will be removed in libbpf 1.0. Signed-off-by: Andrii Nakryiko Signed-off-by: Alexei Starovoitov Link: https://lore.kernel.org/bpf/20211025224531.1088894-5-andrii@kernel.org --- tools/lib/bpf/libbpf.h | 1 + 1 file changed, 1 insertion(+) diff --git a/tools/lib/bpf/libbpf.h b/tools/lib/bpf/libbpf.h index 449eeed80be66..e1900819bfab1 100644 --- a/tools/lib/bpf/libbpf.h +++ b/tools/lib/bpf/libbpf.h @@ -224,6 +224,7 @@ LIBBPF_API bool bpf_program__autoload(const struct bpf_program *prog); LIBBPF_API int bpf_program__set_autoload(struct bpf_program *prog, bool autoload); /* returns program size in bytes */ +LIBBPF_DEPRECATED_SINCE(0, 7, "use bpf_program__insn_cnt() instead") LIBBPF_API size_t bpf_program__size(const struct bpf_program *prog); struct bpf_insn; From 45f2bebc8079788f62f22d9e8b2819afb1789d7b Mon Sep 17 00:00:00 2001 From: Ilya Leoshkevich Date: Tue, 26 Oct 2021 03:08:26 +0200 Subject: [PATCH 128/181] libbpf: Fix endianness detection in BPF_CORE_READ_BITFIELD_PROBED() __BYTE_ORDER is supposed to be defined by a libc, and __BYTE_ORDER__ - by a compiler. bpf_core_read.h checks __BYTE_ORDER == __LITTLE_ENDIAN, which is true if neither are defined, leading to incorrect behavior on big-endian hosts if libc headers are not included, which is often the case. Fixes: ee26dade0e3b ("libbpf: Add support for relocatable bitfields") Signed-off-by: Ilya Leoshkevich Signed-off-by: Andrii Nakryiko Link: https://lore.kernel.org/bpf/20211026010831.748682-2-iii@linux.ibm.com --- tools/lib/bpf/bpf_core_read.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tools/lib/bpf/bpf_core_read.h b/tools/lib/bpf/bpf_core_read.h index 09ebe3db5f2f8..e4aa9996a5501 100644 --- a/tools/lib/bpf/bpf_core_read.h +++ b/tools/lib/bpf/bpf_core_read.h @@ -40,7 +40,7 @@ enum bpf_enum_value_kind { #define __CORE_RELO(src, field, info) \ __builtin_preserve_field_info((src)->field, BPF_FIELD_##info) -#if __BYTE_ORDER == __LITTLE_ENDIAN +#if __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__ #define __CORE_BITFIELD_PROBE_READ(dst, src, fld) \ bpf_probe_read_kernel( \ (void *)dst, \ From 3930198dc9a0720a2b6561c67e55859ec51b73f9 Mon Sep 17 00:00:00 2001 From: Ilya Leoshkevich Date: Tue, 26 Oct 2021 03:08:27 +0200 Subject: [PATCH 129/181] libbpf: Use __BYTE_ORDER__ Use the compiler-defined __BYTE_ORDER__ instead of the libc-defined __BYTE_ORDER for consistency. Signed-off-by: Ilya Leoshkevich Signed-off-by: Andrii Nakryiko Link: https://lore.kernel.org/bpf/20211026010831.748682-3-iii@linux.ibm.com --- tools/lib/bpf/btf.c | 4 ++-- tools/lib/bpf/btf_dump.c | 8 ++++---- tools/lib/bpf/libbpf.c | 4 ++-- tools/lib/bpf/linker.c | 12 ++++++------ tools/lib/bpf/relo_core.c | 2 +- 5 files changed, 15 insertions(+), 15 deletions(-) diff --git a/tools/lib/bpf/btf.c b/tools/lib/bpf/btf.c index ef924fc2c9114..0c628c33e23b7 100644 --- a/tools/lib/bpf/btf.c +++ b/tools/lib/bpf/btf.c @@ -538,9 +538,9 @@ int btf__set_pointer_size(struct btf *btf, size_t ptr_sz) static bool is_host_big_endian(void) { -#if __BYTE_ORDER == __LITTLE_ENDIAN +#if __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__ return false; -#elif __BYTE_ORDER == __BIG_ENDIAN +#elif __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__ return true; #else # error "Unrecognized __BYTE_ORDER__" diff --git a/tools/lib/bpf/btf_dump.c b/tools/lib/bpf/btf_dump.c index 8e05ab44c22ad..17db62b5002e8 100644 --- a/tools/lib/bpf/btf_dump.c +++ b/tools/lib/bpf/btf_dump.c @@ -1576,11 +1576,11 @@ static int btf_dump_get_bitfield_value(struct btf_dump *d, /* Bitfield value retrieval is done in two steps; first relevant bytes are * stored in num, then we left/right shift num to eliminate irrelevant bits. */ -#if __BYTE_ORDER == __LITTLE_ENDIAN +#if __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__ for (i = t->size - 1; i >= 0; i--) num = num * 256 + bytes[i]; nr_copy_bits = bit_sz + bits_offset; -#elif __BYTE_ORDER == __BIG_ENDIAN +#elif __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__ for (i = 0; i < t->size; i++) num = num * 256 + bytes[i]; nr_copy_bits = t->size * 8 - bits_offset; @@ -1700,10 +1700,10 @@ static int btf_dump_int_data(struct btf_dump *d, /* avoid use of __int128 as some 32-bit platforms do not * support it. */ -#if __BYTE_ORDER == __LITTLE_ENDIAN +#if __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__ lsi = ints[0]; msi = ints[1]; -#elif __BYTE_ORDER == __BIG_ENDIAN +#elif __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__ lsi = ints[1]; msi = ints[0]; #else diff --git a/tools/lib/bpf/libbpf.c b/tools/lib/bpf/libbpf.c index 4b21586575071..2fbed2d4a6454 100644 --- a/tools/lib/bpf/libbpf.c +++ b/tools/lib/bpf/libbpf.c @@ -1299,10 +1299,10 @@ static int bpf_object__elf_init(struct bpf_object *obj) static int bpf_object__check_endianness(struct bpf_object *obj) { -#if __BYTE_ORDER == __LITTLE_ENDIAN +#if __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__ if (obj->efile.ehdr->e_ident[EI_DATA] == ELFDATA2LSB) return 0; -#elif __BYTE_ORDER == __BIG_ENDIAN +#elif __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__ if (obj->efile.ehdr->e_ident[EI_DATA] == ELFDATA2MSB) return 0; #else diff --git a/tools/lib/bpf/linker.c b/tools/lib/bpf/linker.c index 7bf658fbda800..ce0800e61dc71 100644 --- a/tools/lib/bpf/linker.c +++ b/tools/lib/bpf/linker.c @@ -323,12 +323,12 @@ static int init_output_elf(struct bpf_linker *linker, const char *file) linker->elf_hdr->e_machine = EM_BPF; linker->elf_hdr->e_type = ET_REL; -#if __BYTE_ORDER == __LITTLE_ENDIAN +#if __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__ linker->elf_hdr->e_ident[EI_DATA] = ELFDATA2LSB; -#elif __BYTE_ORDER == __BIG_ENDIAN +#elif __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__ linker->elf_hdr->e_ident[EI_DATA] = ELFDATA2MSB; #else -#error "Unknown __BYTE_ORDER" +#error "Unknown __BYTE_ORDER__" #endif /* STRTAB */ @@ -538,12 +538,12 @@ static int linker_load_obj_file(struct bpf_linker *linker, const char *filename, const struct bpf_linker_file_opts *opts, struct src_obj *obj) { -#if __BYTE_ORDER == __LITTLE_ENDIAN +#if __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__ const int host_endianness = ELFDATA2LSB; -#elif __BYTE_ORDER == __BIG_ENDIAN +#elif __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__ const int host_endianness = ELFDATA2MSB; #else -#error "Unknown __BYTE_ORDER" +#error "Unknown __BYTE_ORDER__" #endif int err = 0; Elf_Scn *scn; diff --git a/tools/lib/bpf/relo_core.c b/tools/lib/bpf/relo_core.c index 4016ed492d0c2..b5b8956a1be85 100644 --- a/tools/lib/bpf/relo_core.c +++ b/tools/lib/bpf/relo_core.c @@ -662,7 +662,7 @@ static int bpf_core_calc_field_relo(const char *prog_name, *validate = true; /* signedness is never ambiguous */ break; case BPF_FIELD_LSHIFT_U64: -#if __BYTE_ORDER == __LITTLE_ENDIAN +#if __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__ *val = 64 - (bit_off + bit_sz - byte_off * 8); #else *val = (8 - byte_sz) * 8 + (bit_off - byte_off * 8); From 06fca841fb64c9ed499a3575a530014268d0251a Mon Sep 17 00:00:00 2001 From: Ilya Leoshkevich Date: Tue, 26 Oct 2021 03:08:28 +0200 Subject: [PATCH 130/181] selftests/bpf: Use __BYTE_ORDER__ Use the compiler-defined __BYTE_ORDER__ instead of the libc-defined __BYTE_ORDER for consistency. Signed-off-by: Ilya Leoshkevich Signed-off-by: Andrii Nakryiko Link: https://lore.kernel.org/bpf/20211026010831.748682-4-iii@linux.ibm.com --- .../testing/selftests/bpf/prog_tests/btf_endian.c | 6 +++--- tools/testing/selftests/bpf/test_sysctl.c | 4 ++-- tools/testing/selftests/bpf/verifier/ctx_skb.c | 14 +++++++------- tools/testing/selftests/bpf/verifier/lwt.c | 2 +- .../bpf/verifier/perf_event_sample_period.c | 6 +++--- 5 files changed, 16 insertions(+), 16 deletions(-) diff --git a/tools/testing/selftests/bpf/prog_tests/btf_endian.c b/tools/testing/selftests/bpf/prog_tests/btf_endian.c index 2653cc482df46..8afbf3d0b89a2 100644 --- a/tools/testing/selftests/bpf/prog_tests/btf_endian.c +++ b/tools/testing/selftests/bpf/prog_tests/btf_endian.c @@ -7,12 +7,12 @@ #include void test_btf_endian() { -#if __BYTE_ORDER == __LITTLE_ENDIAN +#if __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__ enum btf_endianness endian = BTF_LITTLE_ENDIAN; -#elif __BYTE_ORDER == __BIG_ENDIAN +#elif __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__ enum btf_endianness endian = BTF_BIG_ENDIAN; #else -#error "Unrecognized __BYTE_ORDER" +#error "Unrecognized __BYTE_ORDER__" #endif enum btf_endianness swap_endian = 1 - endian; struct btf *btf = NULL, *swap_btf = NULL; diff --git a/tools/testing/selftests/bpf/test_sysctl.c b/tools/testing/selftests/bpf/test_sysctl.c index a20a919244c0b..a3bb6d399daa0 100644 --- a/tools/testing/selftests/bpf/test_sysctl.c +++ b/tools/testing/selftests/bpf/test_sysctl.c @@ -124,7 +124,7 @@ static struct sysctl_test tests[] = { .descr = "ctx:write sysctl:write read ok narrow", .insns = { /* u64 w = (u16)write & 1; */ -#if __BYTE_ORDER == __LITTLE_ENDIAN +#if __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__ BPF_LDX_MEM(BPF_H, BPF_REG_7, BPF_REG_1, offsetof(struct bpf_sysctl, write)), #else @@ -184,7 +184,7 @@ static struct sysctl_test tests[] = { .descr = "ctx:file_pos sysctl:read read ok narrow", .insns = { /* If (file_pos == X) */ -#if __BYTE_ORDER == __LITTLE_ENDIAN +#if __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__ BPF_LDX_MEM(BPF_B, BPF_REG_7, BPF_REG_1, offsetof(struct bpf_sysctl, file_pos)), #else diff --git a/tools/testing/selftests/bpf/verifier/ctx_skb.c b/tools/testing/selftests/bpf/verifier/ctx_skb.c index 9e1a30b941972..83cecfbd67394 100644 --- a/tools/testing/selftests/bpf/verifier/ctx_skb.c +++ b/tools/testing/selftests/bpf/verifier/ctx_skb.c @@ -502,7 +502,7 @@ "check skb->hash byte load permitted", .insns = { BPF_MOV64_IMM(BPF_REG_0, 0), -#if __BYTE_ORDER == __LITTLE_ENDIAN +#if __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__ BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_1, offsetof(struct __sk_buff, hash)), #else @@ -537,7 +537,7 @@ "check skb->hash byte load permitted 3", .insns = { BPF_MOV64_IMM(BPF_REG_0, 0), -#if __BYTE_ORDER == __LITTLE_ENDIAN +#if __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__ BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_1, offsetof(struct __sk_buff, hash) + 3), #else @@ -646,7 +646,7 @@ "check skb->hash half load permitted", .insns = { BPF_MOV64_IMM(BPF_REG_0, 0), -#if __BYTE_ORDER == __LITTLE_ENDIAN +#if __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__ BPF_LDX_MEM(BPF_H, BPF_REG_0, BPF_REG_1, offsetof(struct __sk_buff, hash)), #else @@ -661,7 +661,7 @@ "check skb->hash half load permitted 2", .insns = { BPF_MOV64_IMM(BPF_REG_0, 0), -#if __BYTE_ORDER == __LITTLE_ENDIAN +#if __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__ BPF_LDX_MEM(BPF_H, BPF_REG_0, BPF_REG_1, offsetof(struct __sk_buff, hash) + 2), #else @@ -676,7 +676,7 @@ "check skb->hash half load not permitted, unaligned 1", .insns = { BPF_MOV64_IMM(BPF_REG_0, 0), -#if __BYTE_ORDER == __LITTLE_ENDIAN +#if __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__ BPF_LDX_MEM(BPF_H, BPF_REG_0, BPF_REG_1, offsetof(struct __sk_buff, hash) + 1), #else @@ -693,7 +693,7 @@ "check skb->hash half load not permitted, unaligned 3", .insns = { BPF_MOV64_IMM(BPF_REG_0, 0), -#if __BYTE_ORDER == __LITTLE_ENDIAN +#if __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__ BPF_LDX_MEM(BPF_H, BPF_REG_0, BPF_REG_1, offsetof(struct __sk_buff, hash) + 3), #else @@ -951,7 +951,7 @@ "check skb->data half load not permitted", .insns = { BPF_MOV64_IMM(BPF_REG_0, 0), -#if __BYTE_ORDER == __LITTLE_ENDIAN +#if __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__ BPF_LDX_MEM(BPF_H, BPF_REG_0, BPF_REG_1, offsetof(struct __sk_buff, data)), #else diff --git a/tools/testing/selftests/bpf/verifier/lwt.c b/tools/testing/selftests/bpf/verifier/lwt.c index 2cab6a3966bb4..5c8944d0b0910 100644 --- a/tools/testing/selftests/bpf/verifier/lwt.c +++ b/tools/testing/selftests/bpf/verifier/lwt.c @@ -174,7 +174,7 @@ "check skb->tc_classid half load not permitted for lwt prog", .insns = { BPF_MOV64_IMM(BPF_REG_0, 0), -#if __BYTE_ORDER == __LITTLE_ENDIAN +#if __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__ BPF_LDX_MEM(BPF_H, BPF_REG_0, BPF_REG_1, offsetof(struct __sk_buff, tc_classid)), #else diff --git a/tools/testing/selftests/bpf/verifier/perf_event_sample_period.c b/tools/testing/selftests/bpf/verifier/perf_event_sample_period.c index 471c1a5950d84..d8a9b1a1f9a25 100644 --- a/tools/testing/selftests/bpf/verifier/perf_event_sample_period.c +++ b/tools/testing/selftests/bpf/verifier/perf_event_sample_period.c @@ -2,7 +2,7 @@ "check bpf_perf_event_data->sample_period byte load permitted", .insns = { BPF_MOV64_IMM(BPF_REG_0, 0), -#if __BYTE_ORDER == __LITTLE_ENDIAN +#if __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__ BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_1, offsetof(struct bpf_perf_event_data, sample_period)), #else @@ -18,7 +18,7 @@ "check bpf_perf_event_data->sample_period half load permitted", .insns = { BPF_MOV64_IMM(BPF_REG_0, 0), -#if __BYTE_ORDER == __LITTLE_ENDIAN +#if __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__ BPF_LDX_MEM(BPF_H, BPF_REG_0, BPF_REG_1, offsetof(struct bpf_perf_event_data, sample_period)), #else @@ -34,7 +34,7 @@ "check bpf_perf_event_data->sample_period word load permitted", .insns = { BPF_MOV64_IMM(BPF_REG_0, 0), -#if __BYTE_ORDER == __LITTLE_ENDIAN +#if __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__ BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_1, offsetof(struct bpf_perf_event_data, sample_period)), #else From 14e6cac771355dd5eb0b2cd66164ebd34a1621a7 Mon Sep 17 00:00:00 2001 From: Ilya Leoshkevich Date: Tue, 26 Oct 2021 03:08:29 +0200 Subject: [PATCH 131/181] samples: seccomp: Use __BYTE_ORDER__ Use the compiler-defined __BYTE_ORDER__ instead of the libc-defined __BYTE_ORDER for consistency. Signed-off-by: Ilya Leoshkevich Signed-off-by: Andrii Nakryiko Link: https://lore.kernel.org/bpf/20211026010831.748682-5-iii@linux.ibm.com --- samples/seccomp/bpf-helper.h | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/samples/seccomp/bpf-helper.h b/samples/seccomp/bpf-helper.h index 0cc9816fe8e82..417e48a4c4df2 100644 --- a/samples/seccomp/bpf-helper.h +++ b/samples/seccomp/bpf-helper.h @@ -62,9 +62,9 @@ void seccomp_bpf_print(struct sock_filter *filter, size_t count); #define EXPAND(...) __VA_ARGS__ /* Ensure that we load the logically correct offset. */ -#if __BYTE_ORDER == __LITTLE_ENDIAN +#if __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__ #define LO_ARG(idx) offsetof(struct seccomp_data, args[(idx)]) -#elif __BYTE_ORDER == __BIG_ENDIAN +#elif __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__ #define LO_ARG(idx) offsetof(struct seccomp_data, args[(idx)]) + sizeof(__u32) #else #error "Unknown endianness" @@ -85,10 +85,10 @@ void seccomp_bpf_print(struct sock_filter *filter, size_t count); #elif __BITS_PER_LONG == 64 /* Ensure that we load the logically correct offset. */ -#if __BYTE_ORDER == __LITTLE_ENDIAN +#if __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__ #define ENDIAN(_lo, _hi) _lo, _hi #define HI_ARG(idx) offsetof(struct seccomp_data, args[(idx)]) + sizeof(__u32) -#elif __BYTE_ORDER == __BIG_ENDIAN +#elif __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__ #define ENDIAN(_lo, _hi) _hi, _lo #define HI_ARG(idx) offsetof(struct seccomp_data, args[(idx)]) #endif From 3e7ed9cebb551ce9bfbf2985da9cdadd8186e1eb Mon Sep 17 00:00:00 2001 From: Ilya Leoshkevich Date: Tue, 26 Oct 2021 03:08:30 +0200 Subject: [PATCH 132/181] selftests/seccomp: Use __BYTE_ORDER__ Use the compiler-defined __BYTE_ORDER__ instead of the libc-defined __BYTE_ORDER for consistency. Signed-off-by: Ilya Leoshkevich Signed-off-by: Andrii Nakryiko Link: https://lore.kernel.org/bpf/20211026010831.748682-6-iii@linux.ibm.com --- tools/testing/selftests/seccomp/seccomp_bpf.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/tools/testing/selftests/seccomp/seccomp_bpf.c b/tools/testing/selftests/seccomp/seccomp_bpf.c index 1d64891e64923..d425688cf59c1 100644 --- a/tools/testing/selftests/seccomp/seccomp_bpf.c +++ b/tools/testing/selftests/seccomp/seccomp_bpf.c @@ -276,12 +276,12 @@ int seccomp(unsigned int op, unsigned int flags, void *args) } #endif -#if __BYTE_ORDER == __LITTLE_ENDIAN +#if __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__ #define syscall_arg(_n) (offsetof(struct seccomp_data, args[_n])) -#elif __BYTE_ORDER == __BIG_ENDIAN +#elif __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__ #define syscall_arg(_n) (offsetof(struct seccomp_data, args[_n]) + sizeof(__u32)) #else -#error "wut? Unknown __BYTE_ORDER?!" +#error "wut? Unknown __BYTE_ORDER__?!" #endif #define SIBLING_EXIT_UNKILLED 0xbadbeef From 2e2c6d3fb38344df92b63162a28ec09491cc0700 Mon Sep 17 00:00:00 2001 From: Ilya Leoshkevich Date: Tue, 26 Oct 2021 03:08:31 +0200 Subject: [PATCH 133/181] selftests/bpf: Fix test_core_reloc_mods on big-endian machines This is the same as commit d164dd9a5c08 ("selftests/bpf: Fix test_core_autosize on big-endian machines"), but for test_core_reloc_mods. Signed-off-by: Ilya Leoshkevich Signed-off-by: Andrii Nakryiko Link: https://lore.kernel.org/bpf/20211026010831.748682-7-iii@linux.ibm.com --- tools/testing/selftests/bpf/progs/test_core_reloc_mods.c | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/tools/testing/selftests/bpf/progs/test_core_reloc_mods.c b/tools/testing/selftests/bpf/progs/test_core_reloc_mods.c index 8b533db4a7a5e..b2ded497572a1 100644 --- a/tools/testing/selftests/bpf/progs/test_core_reloc_mods.c +++ b/tools/testing/selftests/bpf/progs/test_core_reloc_mods.c @@ -42,7 +42,16 @@ struct core_reloc_mods { core_reloc_mods_substruct_t h; }; +#if __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__ #define CORE_READ(dst, src) bpf_core_read(dst, sizeof(*(dst)), src) +#else +#define CORE_READ(dst, src) ({ \ + int __sz = sizeof(*(dst)) < sizeof(*(src)) ? sizeof(*(dst)) : \ + sizeof(*(src)); \ + bpf_core_read((char *)(dst) + sizeof(*(dst)) - __sz, __sz, \ + (const char *)(src) + sizeof(*(src)) - __sz); \ +}) +#endif SEC("raw_tracepoint/sys_enter") int test_core_mods(void *ctx) From b4e87072762d063f9c20ba0ab2120e1910fa379f Mon Sep 17 00:00:00 2001 From: Song Liu Date: Mon, 25 Oct 2021 17:07:33 -0700 Subject: [PATCH 134/181] selftests/bpf: Skip all serial_test_get_branch_snapshot in vm Skipping the second half of the test is not enough to silent the warning in dmesg. Skip the whole test before we can either properly silent the warning in kernel, or fix LBR snapshot for VM. Fixes: 025bd7c753aa ("selftests/bpf: Add test for bpf_get_branch_snapshot") Fixes: aa67fdb46436 ("selftests/bpf: Skip the second half of get_branch_snapshot in vm") Signed-off-by: Song Liu Signed-off-by: Andrii Nakryiko Link: https://lore.kernel.org/bpf/20211026000733.477714-1-songliubraving@fb.com --- .../bpf/prog_tests/get_branch_snapshot.c | 16 ++++++---------- 1 file changed, 6 insertions(+), 10 deletions(-) diff --git a/tools/testing/selftests/bpf/prog_tests/get_branch_snapshot.c b/tools/testing/selftests/bpf/prog_tests/get_branch_snapshot.c index d6d70a359aeb5..81402e4439844 100644 --- a/tools/testing/selftests/bpf/prog_tests/get_branch_snapshot.c +++ b/tools/testing/selftests/bpf/prog_tests/get_branch_snapshot.c @@ -78,6 +78,12 @@ void serial_test_get_branch_snapshot(void) struct get_branch_snapshot *skel = NULL; int err; + /* Skip the test before we fix LBR snapshot for hypervisor. */ + if (is_hypervisor()) { + test__skip(); + return; + } + if (create_perf_events()) { test__skip(); /* system doesn't support LBR */ goto cleanup; @@ -107,16 +113,6 @@ void serial_test_get_branch_snapshot(void) goto cleanup; } - if (is_hypervisor()) { - /* As of today, LBR in hypervisor cannot be stopped before - * too many entries are flushed. Skip the hit/waste test - * for now in hypervisor until we optimize the LBR in - * hypervisor. - */ - test__skip(); - goto cleanup; - } - ASSERT_GT(skel->bss->test1_hits, 6, "find_looptest_in_lbr"); /* Given we stop LBR in software, we will waste a few entries. From 20d1b54a52bd9a1d5d23a45dab75993ce2f1d617 Mon Sep 17 00:00:00 2001 From: Song Liu Date: Fri, 22 Oct 2021 16:48:14 -0700 Subject: [PATCH 135/181] selftests/bpf: Guess function end for test_get_branch_snapshot Function in modules could appear in /proc/kallsyms in random order. ffffffffa02608a0 t bpf_testmod_loop_test ffffffffa02600c0 t __traceiter_bpf_testmod_test_writable_bare ffffffffa0263b60 d __tracepoint_bpf_testmod_test_write_bare ffffffffa02608c0 T bpf_testmod_test_read ffffffffa0260d08 t __SCT__tp_func_bpf_testmod_test_writable_bare ffffffffa0263300 d __SCK__tp_func_bpf_testmod_test_read ffffffffa0260680 T bpf_testmod_test_write ffffffffa0260860 t bpf_testmod_test_mod_kfunc Therefore, we cannot reliably use kallsyms_find_next() to find the end of a function. Replace it with a simple guess (start + 128). This is good enough for this test. Signed-off-by: Song Liu Signed-off-by: Andrii Nakryiko Link: https://lore.kernel.org/bpf/20211022234814.318457-1-songliubraving@fb.com --- .../bpf/prog_tests/get_branch_snapshot.c | 7 ++-- tools/testing/selftests/bpf/trace_helpers.c | 36 ------------------- tools/testing/selftests/bpf/trace_helpers.h | 5 --- 3 files changed, 4 insertions(+), 44 deletions(-) diff --git a/tools/testing/selftests/bpf/prog_tests/get_branch_snapshot.c b/tools/testing/selftests/bpf/prog_tests/get_branch_snapshot.c index 81402e4439844..3948da12a5284 100644 --- a/tools/testing/selftests/bpf/prog_tests/get_branch_snapshot.c +++ b/tools/testing/selftests/bpf/prog_tests/get_branch_snapshot.c @@ -97,9 +97,10 @@ void serial_test_get_branch_snapshot(void) if (!ASSERT_OK(err, "kallsyms_find")) goto cleanup; - err = kallsyms_find_next("bpf_testmod_loop_test", &skel->bss->address_high); - if (!ASSERT_OK(err, "kallsyms_find_next")) - goto cleanup; + /* Just a guess for the end of this function, as module functions + * in /proc/kallsyms could come in any order. + */ + skel->bss->address_high = skel->bss->address_low + 128; err = get_branch_snapshot__attach(skel); if (!ASSERT_OK(err, "get_branch_snapshot__attach")) diff --git a/tools/testing/selftests/bpf/trace_helpers.c b/tools/testing/selftests/bpf/trace_helpers.c index 5100a169b72b1..7b7f918eda776 100644 --- a/tools/testing/selftests/bpf/trace_helpers.c +++ b/tools/testing/selftests/bpf/trace_helpers.c @@ -118,42 +118,6 @@ int kallsyms_find(const char *sym, unsigned long long *addr) return err; } -/* find the address of the next symbol of the same type, this can be used - * to determine the end of a function. - */ -int kallsyms_find_next(const char *sym, unsigned long long *addr) -{ - char type, found_type, name[500]; - unsigned long long value; - bool found = false; - int err = 0; - FILE *f; - - f = fopen("/proc/kallsyms", "r"); - if (!f) - return -EINVAL; - - while (fscanf(f, "%llx %c %499s%*[^\n]\n", &value, &type, name) > 0) { - /* Different types of symbols in kernel modules are mixed - * in /proc/kallsyms. Only return the next matching type. - * Use tolower() for type so that 'T' matches 't'. - */ - if (found && found_type == tolower(type)) { - *addr = value; - goto out; - } - if (strcmp(name, sym) == 0) { - found = true; - found_type = tolower(type); - } - } - err = -ENOENT; - -out: - fclose(f); - return err; -} - void read_trace_pipe(void) { int trace_fd; diff --git a/tools/testing/selftests/bpf/trace_helpers.h b/tools/testing/selftests/bpf/trace_helpers.h index bc8ed86105d94..d907b445524d5 100644 --- a/tools/testing/selftests/bpf/trace_helpers.h +++ b/tools/testing/selftests/bpf/trace_helpers.h @@ -16,11 +16,6 @@ long ksym_get_addr(const char *name); /* open kallsyms and find addresses on the fly, faster than load + search. */ int kallsyms_find(const char *sym, unsigned long long *addr); -/* find the address of the next symbol, this can be used to determine the - * end of a function - */ -int kallsyms_find_next(const char *sym, unsigned long long *addr); - void read_trace_pipe(void); ssize_t get_uprobe_offset(const void *addr, ssize_t base); From 689624f037ce219d42312534eff4dc470b54dec4 Mon Sep 17 00:00:00 2001 From: Joe Burton Date: Tue, 26 Oct 2021 22:35:28 +0000 Subject: [PATCH 136/181] libbpf: Deprecate bpf_objects_list Add a flag to `enum libbpf_strict_mode' to disable the global `bpf_objects_list', preventing race conditions when concurrent threads call bpf_object__open() or bpf_object__close(). bpf_object__next() will return NULL if this option is set. Callers may achieve the same workflow by tracking bpf_objects in application code. [0] Closes: https://github.com/libbpf/libbpf/issues/293 Signed-off-by: Joe Burton Signed-off-by: Andrii Nakryiko Link: https://lore.kernel.org/bpf/20211026223528.413950-1-jevburton.kernel@gmail.com --- tools/lib/bpf/libbpf.c | 8 +++++++- tools/lib/bpf/libbpf.h | 3 ++- tools/lib/bpf/libbpf_legacy.h | 6 ++++++ 3 files changed, 15 insertions(+), 2 deletions(-) diff --git a/tools/lib/bpf/libbpf.c b/tools/lib/bpf/libbpf.c index 2fbed2d4a6454..59d39ce9f375d 100644 --- a/tools/lib/bpf/libbpf.c +++ b/tools/lib/bpf/libbpf.c @@ -1148,6 +1148,7 @@ static struct bpf_object *bpf_object__new(const char *path, size_t obj_buf_sz, const char *obj_name) { + bool strict = (libbpf_mode & LIBBPF_STRICT_NO_OBJECT_LIST); struct bpf_object *obj; char *end; @@ -1188,7 +1189,8 @@ static struct bpf_object *bpf_object__new(const char *path, obj->loaded = false; INIT_LIST_HEAD(&obj->list); - list_add(&obj->list, &bpf_objects_list); + if (!strict) + list_add(&obj->list, &bpf_objects_list); return obj; } @@ -7935,6 +7937,10 @@ struct bpf_object * bpf_object__next(struct bpf_object *prev) { struct bpf_object *next; + bool strict = (libbpf_mode & LIBBPF_STRICT_NO_OBJECT_LIST); + + if (strict) + return NULL; if (!prev) next = list_first_entry(&bpf_objects_list, diff --git a/tools/lib/bpf/libbpf.h b/tools/lib/bpf/libbpf.h index e1900819bfab1..defabdbe7760f 100644 --- a/tools/lib/bpf/libbpf.h +++ b/tools/lib/bpf/libbpf.h @@ -168,7 +168,8 @@ LIBBPF_API struct bpf_program * bpf_object__find_program_by_name(const struct bpf_object *obj, const char *name); -LIBBPF_API struct bpf_object *bpf_object__next(struct bpf_object *prev); +LIBBPF_API LIBBPF_DEPRECATED_SINCE(0, 7, "track bpf_objects in application code instead") +struct bpf_object *bpf_object__next(struct bpf_object *prev); #define bpf_object__for_each_safe(pos, tmp) \ for ((pos) = bpf_object__next(NULL), \ (tmp) = bpf_object__next(pos); \ diff --git a/tools/lib/bpf/libbpf_legacy.h b/tools/lib/bpf/libbpf_legacy.h index 29ccafab11a8b..5ba5c9beccfa9 100644 --- a/tools/lib/bpf/libbpf_legacy.h +++ b/tools/lib/bpf/libbpf_legacy.h @@ -57,6 +57,12 @@ enum libbpf_strict_mode { * function name instead of section name. */ LIBBPF_STRICT_SEC_NAME = 0x04, + /* + * Disable the global 'bpf_objects_list'. Maintaining this list adds + * a race condition to bpf_object__open() and bpf_object__close(). + * Clients can maintain it on their own if it is valuable for them. + */ + LIBBPF_STRICT_NO_OBJECT_LIST = 0x08, __LIBBPF_STRICT_LAST, }; From f941eadd8d6d4ee2f8c9aeab8e1da5e647533a7d Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Tue, 26 Oct 2021 14:41:31 -0700 Subject: [PATCH 137/181] bpf: Avoid races in __bpf_prog_run() for 32bit arches __bpf_prog_run() can run from non IRQ contexts, meaning it could be re entered if interrupted. This calls for the irq safe variant of u64_stats_update_{begin|end}, or risk a deadlock. This patch is a nop on 64bit arches, fortunately. syzbot report: WARNING: inconsistent lock state 5.12.0-rc3-syzkaller #0 Not tainted -------------------------------- inconsistent {IN-SOFTIRQ-W} -> {SOFTIRQ-ON-W} usage. udevd/4013 [HC0[0]:SC0[0]:HE1:SE1] takes: ff7c9dec (&(&pstats->syncp)->seq){+.?.}-{0:0}, at: sk_filter include/linux/filter.h:867 [inline] ff7c9dec (&(&pstats->syncp)->seq){+.?.}-{0:0}, at: do_one_broadcast net/netlink/af_netlink.c:1468 [inline] ff7c9dec (&(&pstats->syncp)->seq){+.?.}-{0:0}, at: netlink_broadcast_filtered+0x27c/0x4fc net/netlink/af_netlink.c:1520 {IN-SOFTIRQ-W} state was registered at: lock_acquire.part.0+0xf0/0x41c kernel/locking/lockdep.c:5510 lock_acquire+0x6c/0x74 kernel/locking/lockdep.c:5483 do_write_seqcount_begin_nested include/linux/seqlock.h:520 [inline] do_write_seqcount_begin include/linux/seqlock.h:545 [inline] u64_stats_update_begin include/linux/u64_stats_sync.h:129 [inline] bpf_prog_run_pin_on_cpu include/linux/filter.h:624 [inline] bpf_prog_run_clear_cb+0x1bc/0x270 include/linux/filter.h:755 run_filter+0xa0/0x17c net/packet/af_packet.c:2031 packet_rcv+0xc0/0x3e0 net/packet/af_packet.c:2104 dev_queue_xmit_nit+0x2bc/0x39c net/core/dev.c:2387 xmit_one net/core/dev.c:3588 [inline] dev_hard_start_xmit+0x94/0x518 net/core/dev.c:3609 sch_direct_xmit+0x11c/0x1f0 net/sched/sch_generic.c:313 qdisc_restart net/sched/sch_generic.c:376 [inline] __qdisc_run+0x194/0x7f8 net/sched/sch_generic.c:384 qdisc_run include/net/pkt_sched.h:136 [inline] qdisc_run include/net/pkt_sched.h:128 [inline] __dev_xmit_skb net/core/dev.c:3795 [inline] __dev_queue_xmit+0x65c/0xf84 net/core/dev.c:4150 dev_queue_xmit+0x14/0x18 net/core/dev.c:4215 neigh_resolve_output net/core/neighbour.c:1491 [inline] neigh_resolve_output+0x170/0x228 net/core/neighbour.c:1471 neigh_output include/net/neighbour.h:510 [inline] ip6_finish_output2+0x2e4/0x9fc net/ipv6/ip6_output.c:117 __ip6_finish_output net/ipv6/ip6_output.c:182 [inline] __ip6_finish_output+0x164/0x3f8 net/ipv6/ip6_output.c:161 ip6_finish_output+0x2c/0xb0 net/ipv6/ip6_output.c:192 NF_HOOK_COND include/linux/netfilter.h:290 [inline] ip6_output+0x74/0x294 net/ipv6/ip6_output.c:215 dst_output include/net/dst.h:448 [inline] NF_HOOK include/linux/netfilter.h:301 [inline] NF_HOOK include/linux/netfilter.h:295 [inline] mld_sendpack+0x2a8/0x7e4 net/ipv6/mcast.c:1679 mld_send_cr net/ipv6/mcast.c:1975 [inline] mld_ifc_timer_expire+0x1e8/0x494 net/ipv6/mcast.c:2474 call_timer_fn+0xd0/0x570 kernel/time/timer.c:1431 expire_timers kernel/time/timer.c:1476 [inline] __run_timers kernel/time/timer.c:1745 [inline] run_timer_softirq+0x2e4/0x384 kernel/time/timer.c:1758 __do_softirq+0x204/0x7ac kernel/softirq.c:345 do_softirq_own_stack include/asm-generic/softirq_stack.h:10 [inline] invoke_softirq kernel/softirq.c:228 [inline] __irq_exit_rcu+0x1d8/0x200 kernel/softirq.c:422 irq_exit+0x10/0x3c kernel/softirq.c:446 __handle_domain_irq+0xb4/0x120 kernel/irq/irqdesc.c:692 handle_domain_irq include/linux/irqdesc.h:176 [inline] gic_handle_irq+0x84/0xac drivers/irqchip/irq-gic.c:370 __irq_svc+0x5c/0x94 arch/arm/kernel/entry-armv.S:205 debug_smp_processor_id+0x0/0x24 lib/smp_processor_id.c:53 rcu_read_lock_held_common kernel/rcu/update.c:108 [inline] rcu_read_lock_sched_held+0x24/0x7c kernel/rcu/update.c:123 trace_lock_acquire+0x24c/0x278 include/trace/events/lock.h:13 lock_acquire+0x3c/0x74 kernel/locking/lockdep.c:5481 rcu_lock_acquire include/linux/rcupdate.h:267 [inline] rcu_read_lock include/linux/rcupdate.h:656 [inline] avc_has_perm_noaudit+0x6c/0x260 security/selinux/avc.c:1150 selinux_inode_permission+0x140/0x220 security/selinux/hooks.c:3141 security_inode_permission+0x44/0x60 security/security.c:1268 inode_permission.part.0+0x5c/0x13c fs/namei.c:521 inode_permission fs/namei.c:494 [inline] may_lookup fs/namei.c:1652 [inline] link_path_walk.part.0+0xd4/0x38c fs/namei.c:2208 link_path_walk fs/namei.c:2189 [inline] path_lookupat+0x3c/0x1b8 fs/namei.c:2419 filename_lookup+0xa8/0x1a4 fs/namei.c:2453 user_path_at_empty+0x74/0x90 fs/namei.c:2733 do_readlinkat+0x5c/0x12c fs/stat.c:417 __do_sys_readlink fs/stat.c:450 [inline] sys_readlink+0x24/0x28 fs/stat.c:447 ret_fast_syscall+0x0/0x2c arch/arm/mm/proc-v7.S:64 0x7eaa4974 irq event stamp: 298277 hardirqs last enabled at (298277): [<802000d0>] no_work_pending+0x4/0x34 hardirqs last disabled at (298276): [<8020c9b8>] do_work_pending+0x9c/0x648 arch/arm/kernel/signal.c:676 softirqs last enabled at (298216): [<8020167c>] __do_softirq+0x584/0x7ac kernel/softirq.c:372 softirqs last disabled at (298201): [<8024dff4>] do_softirq_own_stack include/asm-generic/softirq_stack.h:10 [inline] softirqs last disabled at (298201): [<8024dff4>] invoke_softirq kernel/softirq.c:228 [inline] softirqs last disabled at (298201): [<8024dff4>] __irq_exit_rcu+0x1d8/0x200 kernel/softirq.c:422 other info that might help us debug this: Possible unsafe locking scenario: CPU0 ---- lock(&(&pstats->syncp)->seq); lock(&(&pstats->syncp)->seq); *** DEADLOCK *** 1 lock held by udevd/4013: #0: 82b09c5c (rcu_read_lock){....}-{1:2}, at: sk_filter_trim_cap+0x54/0x434 net/core/filter.c:139 stack backtrace: CPU: 1 PID: 4013 Comm: udevd Not tainted 5.12.0-rc3-syzkaller #0 Hardware name: ARM-Versatile Express Backtrace: [<81802550>] (dump_backtrace) from [<818027c4>] (show_stack+0x18/0x1c arch/arm/kernel/traps.c:252) r7:00000080 r6:600d0093 r5:00000000 r4:82b58344 [<818027ac>] (show_stack) from [<81809e98>] (__dump_stack lib/dump_stack.c:79 [inline]) [<818027ac>] (show_stack) from [<81809e98>] (dump_stack+0xb8/0xe8 lib/dump_stack.c:120) [<81809de0>] (dump_stack) from [<81804a00>] (print_usage_bug.part.0+0x228/0x230 kernel/locking/lockdep.c:3806) r7:86bcb768 r6:81a0326c r5:830f96a8 r4:86bcb0c0 [<818047d8>] (print_usage_bug.part.0) from [<802bb1b8>] (print_usage_bug kernel/locking/lockdep.c:3776 [inline]) [<818047d8>] (print_usage_bug.part.0) from [<802bb1b8>] (valid_state kernel/locking/lockdep.c:3818 [inline]) [<818047d8>] (print_usage_bug.part.0) from [<802bb1b8>] (mark_lock_irq kernel/locking/lockdep.c:4021 [inline]) [<818047d8>] (print_usage_bug.part.0) from [<802bb1b8>] (mark_lock.part.0+0xc34/0x136c kernel/locking/lockdep.c:4478) r10:83278fe8 r9:82c6d748 r8:00000000 r7:82c6d2d4 r6:00000004 r5:86bcb768 r4:00000006 [<802ba584>] (mark_lock.part.0) from [<802bc644>] (mark_lock kernel/locking/lockdep.c:4442 [inline]) [<802ba584>] (mark_lock.part.0) from [<802bc644>] (mark_usage kernel/locking/lockdep.c:4391 [inline]) [<802ba584>] (mark_lock.part.0) from [<802bc644>] (__lock_acquire+0x9bc/0x3318 kernel/locking/lockdep.c:4854) r10:86bcb768 r9:86bcb0c0 r8:00000001 r7:00040000 r6:0000075a r5:830f96a8 r4:00000000 [<802bbc88>] (__lock_acquire) from [<802bfb90>] (lock_acquire.part.0+0xf0/0x41c kernel/locking/lockdep.c:5510) r10:00000000 r9:600d0013 r8:00000000 r7:00000000 r6:828a2680 r5:828a2680 r4:861e5bc8 [<802bfaa0>] (lock_acquire.part.0) from [<802bff28>] (lock_acquire+0x6c/0x74 kernel/locking/lockdep.c:5483) r10:8146137c r9:00000000 r8:00000001 r7:00000000 r6:00000000 r5:00000000 r4:ff7c9dec [<802bfebc>] (lock_acquire) from [<81381eb4>] (do_write_seqcount_begin_nested include/linux/seqlock.h:520 [inline]) [<802bfebc>] (lock_acquire) from [<81381eb4>] (do_write_seqcount_begin include/linux/seqlock.h:545 [inline]) [<802bfebc>] (lock_acquire) from [<81381eb4>] (u64_stats_update_begin include/linux/u64_stats_sync.h:129 [inline]) [<802bfebc>] (lock_acquire) from [<81381eb4>] (__bpf_prog_run_save_cb include/linux/filter.h:727 [inline]) [<802bfebc>] (lock_acquire) from [<81381eb4>] (bpf_prog_run_save_cb include/linux/filter.h:741 [inline]) [<802bfebc>] (lock_acquire) from [<81381eb4>] (sk_filter_trim_cap+0x26c/0x434 net/core/filter.c:149) r10:a4095dd0 r9:ff7c9dd0 r8:e44be000 r7:8146137c r6:00000001 r5:8611ba80 r4:00000000 [<81381c48>] (sk_filter_trim_cap) from [<8146137c>] (sk_filter include/linux/filter.h:867 [inline]) [<81381c48>] (sk_filter_trim_cap) from [<8146137c>] (do_one_broadcast net/netlink/af_netlink.c:1468 [inline]) [<81381c48>] (sk_filter_trim_cap) from [<8146137c>] (netlink_broadcast_filtered+0x27c/0x4fc net/netlink/af_netlink.c:1520) r10:00000001 r9:833d6b1c r8:00000000 r7:8572f864 r6:8611ba80 r5:8698d800 r4:8572f800 [<81461100>] (netlink_broadcast_filtered) from [<81463e60>] (netlink_broadcast net/netlink/af_netlink.c:1544 [inline]) [<81461100>] (netlink_broadcast_filtered) from [<81463e60>] (netlink_sendmsg+0x3d0/0x478 net/netlink/af_netlink.c:1925) r10:00000000 r9:00000002 r8:8698d800 r7:000000b7 r6:8611b900 r5:861e5f50 r4:86aa3000 [<81463a90>] (netlink_sendmsg) from [<81321f54>] (sock_sendmsg_nosec net/socket.c:654 [inline]) [<81463a90>] (netlink_sendmsg) from [<81321f54>] (sock_sendmsg+0x3c/0x4c net/socket.c:674) r10:00000000 r9:861e5dd4 r8:00000000 r7:86570000 r6:00000000 r5:86570000 r4:861e5f50 [<81321f18>] (sock_sendmsg) from [<813234d0>] (____sys_sendmsg+0x230/0x29c net/socket.c:2350) r5:00000040 r4:861e5f50 [<813232a0>] (____sys_sendmsg) from [<8132549c>] (___sys_sendmsg+0xac/0xe4 net/socket.c:2404) r10:00000128 r9:861e4000 r8:00000000 r7:00000000 r6:86570000 r5:861e5f50 r4:00000000 [<813253f0>] (___sys_sendmsg) from [<81325684>] (__sys_sendmsg net/socket.c:2433 [inline]) [<813253f0>] (___sys_sendmsg) from [<81325684>] (__do_sys_sendmsg net/socket.c:2442 [inline]) [<813253f0>] (___sys_sendmsg) from [<81325684>] (sys_sendmsg+0x58/0xa0 net/socket.c:2440) r8:80200224 r7:00000128 r6:00000000 r5:7eaa541c r4:86570000 [<8132562c>] (sys_sendmsg) from [<80200060>] (ret_fast_syscall+0x0/0x2c arch/arm/mm/proc-v7.S:64) Exception stack(0x861e5fa8 to 0x861e5ff0) 5fa0: 00000000 00000000 0000000c 7eaa541c 00000000 00000000 5fc0: 00000000 00000000 76fbf840 00000128 00000000 0000008f 7eaa541c 000563f8 5fe0: 00056110 7eaa53e0 00036cec 76c9bf44 r6:76fbf840 r5:00000000 r4:00000000 Fixes: 492ecee892c2 ("bpf: enable program stats") Reported-by: syzbot Signed-off-by: Eric Dumazet Signed-off-by: Alexei Starovoitov Link: https://lore.kernel.org/bpf/20211026214133.3114279-2-eric.dumazet@gmail.com --- include/linux/filter.h | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/include/linux/filter.h b/include/linux/filter.h index 47f80adbe7447..2fffe9cc50f94 100644 --- a/include/linux/filter.h +++ b/include/linux/filter.h @@ -612,13 +612,14 @@ static __always_inline u32 __bpf_prog_run(const struct bpf_prog *prog, if (static_branch_unlikely(&bpf_stats_enabled_key)) { struct bpf_prog_stats *stats; u64 start = sched_clock(); + unsigned long flags; ret = dfunc(ctx, prog->insnsi, prog->bpf_func); stats = this_cpu_ptr(prog->stats); - u64_stats_update_begin(&stats->syncp); + flags = u64_stats_update_begin_irqsave(&stats->syncp); stats->cnt++; stats->nsecs += sched_clock() - start; - u64_stats_update_end(&stats->syncp); + u64_stats_update_end_irqrestore(&stats->syncp, flags); } else { ret = dfunc(ctx, prog->insnsi, prog->bpf_func); } From d979617aa84d96acca44c2f5778892b4565e322f Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Tue, 26 Oct 2021 14:41:32 -0700 Subject: [PATCH 138/181] bpf: Fixes possible race in update_prog_stats() for 32bit arches It seems update_prog_stats() suffers from same issue fixed in the prior patch: As it can run while interrupts are enabled, it could be re-entered and the u64_stats syncp could be mangled. Fixes: fec56f5890d9 ("bpf: Introduce BPF trampoline") Signed-off-by: Eric Dumazet Signed-off-by: Alexei Starovoitov Link: https://lore.kernel.org/bpf/20211026214133.3114279-3-eric.dumazet@gmail.com --- kernel/bpf/trampoline.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/kernel/bpf/trampoline.c b/kernel/bpf/trampoline.c index 39eaaff81953d..e5963de368ed3 100644 --- a/kernel/bpf/trampoline.c +++ b/kernel/bpf/trampoline.c @@ -586,11 +586,13 @@ static void notrace update_prog_stats(struct bpf_prog *prog, * Hence check that 'start' is valid. */ start > NO_START_TIME) { + unsigned long flags; + stats = this_cpu_ptr(prog->stats); - u64_stats_update_begin(&stats->syncp); + flags = u64_stats_update_begin_irqsave(&stats->syncp); stats->cnt++; stats->nsecs += sched_clock() - start; - u64_stats_update_end(&stats->syncp); + u64_stats_update_end_irqrestore(&stats->syncp, flags); } } From 61a0abaee2092eee69e44fe60336aa2f5b578938 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Tue, 26 Oct 2021 14:41:33 -0700 Subject: [PATCH 139/181] bpf: Use u64_stats_t in struct bpf_prog_stats Commit 316580b69d0a ("u64_stats: provide u64_stats_t type") fixed possible load/store tearing on 64bit arches. For instance the following C code stats->nsecs += sched_clock() - start; Could be rightfully implemented like this by a compiler, confusing concurrent readers a lot: stats->nsecs += sched_clock(); // arbitrary delay stats->nsecs -= start; Signed-off-by: Eric Dumazet Signed-off-by: Alexei Starovoitov Link: https://lore.kernel.org/bpf/20211026214133.3114279-4-eric.dumazet@gmail.com --- include/linux/filter.h | 10 +++++----- kernel/bpf/syscall.c | 18 ++++++++++++------ kernel/bpf/trampoline.c | 6 +++--- 3 files changed, 20 insertions(+), 14 deletions(-) diff --git a/include/linux/filter.h b/include/linux/filter.h index 2fffe9cc50f94..9782e32458522 100644 --- a/include/linux/filter.h +++ b/include/linux/filter.h @@ -553,9 +553,9 @@ struct bpf_binary_header { }; struct bpf_prog_stats { - u64 cnt; - u64 nsecs; - u64 misses; + u64_stats_t cnt; + u64_stats_t nsecs; + u64_stats_t misses; struct u64_stats_sync syncp; } __aligned(2 * sizeof(u64)); @@ -617,8 +617,8 @@ static __always_inline u32 __bpf_prog_run(const struct bpf_prog *prog, ret = dfunc(ctx, prog->insnsi, prog->bpf_func); stats = this_cpu_ptr(prog->stats); flags = u64_stats_update_begin_irqsave(&stats->syncp); - stats->cnt++; - stats->nsecs += sched_clock() - start; + u64_stats_inc(&stats->cnt); + u64_stats_add(&stats->nsecs, sched_clock() - start); u64_stats_update_end_irqrestore(&stats->syncp, flags); } else { ret = dfunc(ctx, prog->insnsi, prog->bpf_func); diff --git a/kernel/bpf/syscall.c b/kernel/bpf/syscall.c index 5beb321b3b3b6..3e1c024ce3ed1 100644 --- a/kernel/bpf/syscall.c +++ b/kernel/bpf/syscall.c @@ -1804,8 +1804,14 @@ static int bpf_prog_release(struct inode *inode, struct file *filp) return 0; } +struct bpf_prog_kstats { + u64 nsecs; + u64 cnt; + u64 misses; +}; + static void bpf_prog_get_stats(const struct bpf_prog *prog, - struct bpf_prog_stats *stats) + struct bpf_prog_kstats *stats) { u64 nsecs = 0, cnt = 0, misses = 0; int cpu; @@ -1818,9 +1824,9 @@ static void bpf_prog_get_stats(const struct bpf_prog *prog, st = per_cpu_ptr(prog->stats, cpu); do { start = u64_stats_fetch_begin_irq(&st->syncp); - tnsecs = st->nsecs; - tcnt = st->cnt; - tmisses = st->misses; + tnsecs = u64_stats_read(&st->nsecs); + tcnt = u64_stats_read(&st->cnt); + tmisses = u64_stats_read(&st->misses); } while (u64_stats_fetch_retry_irq(&st->syncp, start)); nsecs += tnsecs; cnt += tcnt; @@ -1836,7 +1842,7 @@ static void bpf_prog_show_fdinfo(struct seq_file *m, struct file *filp) { const struct bpf_prog *prog = filp->private_data; char prog_tag[sizeof(prog->tag) * 2 + 1] = { }; - struct bpf_prog_stats stats; + struct bpf_prog_kstats stats; bpf_prog_get_stats(prog, &stats); bin2hex(prog_tag, prog->tag, sizeof(prog->tag)); @@ -3577,7 +3583,7 @@ static int bpf_prog_get_info_by_fd(struct file *file, struct bpf_prog_info __user *uinfo = u64_to_user_ptr(attr->info.info); struct bpf_prog_info info; u32 info_len = attr->info.info_len; - struct bpf_prog_stats stats; + struct bpf_prog_kstats stats; char __user *uinsns; u32 ulen; int err; diff --git a/kernel/bpf/trampoline.c b/kernel/bpf/trampoline.c index e5963de368ed3..e98de5e73ba59 100644 --- a/kernel/bpf/trampoline.c +++ b/kernel/bpf/trampoline.c @@ -545,7 +545,7 @@ static void notrace inc_misses_counter(struct bpf_prog *prog) stats = this_cpu_ptr(prog->stats); u64_stats_update_begin(&stats->syncp); - stats->misses++; + u64_stats_inc(&stats->misses); u64_stats_update_end(&stats->syncp); } @@ -590,8 +590,8 @@ static void notrace update_prog_stats(struct bpf_prog *prog, stats = this_cpu_ptr(prog->stats); flags = u64_stats_update_begin_irqsave(&stats->syncp); - stats->cnt++; - stats->nsecs += sched_clock() - start; + u64_stats_inc(&stats->cnt); + u64_stats_add(&stats->nsecs, sched_clock() - start); u64_stats_update_end_irqrestore(&stats->syncp, flags); } } From 547208a386fa2066fa2d6d48bda145f78c38604f Mon Sep 17 00:00:00 2001 From: Yucong Sun Date: Mon, 25 Oct 2021 15:33:42 -0700 Subject: [PATCH 140/181] selfetests/bpf: Update vmtest.sh defaults Increase memory to 4G, 8 SMP core with host cpu passthrough. This make it run faster in parallel mode and more likely to succeed. Signed-off-by: Yucong Sun Signed-off-by: Andrii Nakryiko Link: https://lore.kernel.org/bpf/20211025223345.2136168-2-fallentree@fb.com --- tools/testing/selftests/bpf/vmtest.sh | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/tools/testing/selftests/bpf/vmtest.sh b/tools/testing/selftests/bpf/vmtest.sh index 8889b3f552367..027198768fad3 100755 --- a/tools/testing/selftests/bpf/vmtest.sh +++ b/tools/testing/selftests/bpf/vmtest.sh @@ -224,10 +224,10 @@ EOF -nodefaults \ -display none \ -serial mon:stdio \ - -cpu kvm64 \ + -cpu host \ -enable-kvm \ - -smp 4 \ - -m 2G \ + -smp 8 \ + -m 4G \ -drive file="${rootfs_img}",format=raw,index=1,media=disk,if=virtio,cache=none \ -kernel "${kernel_bzimage}" \ -append "root=/dev/vda rw console=ttyS0,115200" From 9e7240fb2d6e0ed3944724ae1d224fdf22b3dea3 Mon Sep 17 00:00:00 2001 From: Yucong Sun Date: Mon, 25 Oct 2021 15:33:44 -0700 Subject: [PATCH 141/181] selftests/bpf: Fix attach_probe in parallel mode This patch makes attach_probe uses its own method as attach point, avoiding conflict with other tests like bpf_cookie. Signed-off-by: Yucong Sun Signed-off-by: Andrii Nakryiko Link: https://lore.kernel.org/bpf/20211025223345.2136168-4-fallentree@fb.com --- tools/testing/selftests/bpf/prog_tests/attach_probe.c | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/tools/testing/selftests/bpf/prog_tests/attach_probe.c b/tools/testing/selftests/bpf/prog_tests/attach_probe.c index 6c511dcd1465d..d0bd51eb23c89 100644 --- a/tools/testing/selftests/bpf/prog_tests/attach_probe.c +++ b/tools/testing/selftests/bpf/prog_tests/attach_probe.c @@ -5,6 +5,11 @@ /* this is how USDT semaphore is actually defined, except volatile modifier */ volatile unsigned short uprobe_ref_ctr __attribute__((unused)) __attribute((section(".probes"))); +/* attach point */ +static void method(void) { + return ; +} + void test_attach_probe(void) { DECLARE_LIBBPF_OPTS(bpf_uprobe_opts, uprobe_opts); @@ -33,7 +38,7 @@ void test_attach_probe(void) if (CHECK(base_addr < 0, "get_base_addr", "failed to find base addr: %zd", base_addr)) return; - uprobe_offset = get_uprobe_offset(&get_base_addr, base_addr); + uprobe_offset = get_uprobe_offset(&method, base_addr); ref_ctr_offset = get_rel_offset((uintptr_t)&uprobe_ref_ctr); if (!ASSERT_GE(ref_ctr_offset, 0, "ref_ctr_offset")) @@ -98,7 +103,7 @@ void test_attach_probe(void) goto cleanup; /* trigger & validate uprobe & uretprobe */ - get_base_addr(); + method(); if (CHECK(skel->bss->uprobe_res != 3, "check_uprobe_res", "wrong uprobe res: %d\n", skel->bss->uprobe_res)) From e1ef62a4dd0e66ede4e73791ec1bcec947d4d0b3 Mon Sep 17 00:00:00 2001 From: Yucong Sun Date: Mon, 25 Oct 2021 15:33:45 -0700 Subject: [PATCH 142/181] selftests/bpf: Adding a namespace reset for tc_redirect This patch delete ns_src/ns_dst/ns_redir namespaces before recreating them, making the test more robust. Signed-off-by: Yucong Sun Signed-off-by: Andrii Nakryiko Link: https://lore.kernel.org/bpf/20211025223345.2136168-5-fallentree@fb.com --- .../testing/selftests/bpf/prog_tests/tc_redirect.c | 14 ++++++++++++++ 1 file changed, 14 insertions(+) diff --git a/tools/testing/selftests/bpf/prog_tests/tc_redirect.c b/tools/testing/selftests/bpf/prog_tests/tc_redirect.c index 53672634bc524..4b18b73df10b6 100644 --- a/tools/testing/selftests/bpf/prog_tests/tc_redirect.c +++ b/tools/testing/selftests/bpf/prog_tests/tc_redirect.c @@ -176,6 +176,18 @@ static int netns_setup_namespaces(const char *verb) return 0; } +static void netns_setup_namespaces_nofail(const char *verb) +{ + const char * const *ns = namespaces; + char cmd[128]; + + while (*ns) { + snprintf(cmd, sizeof(cmd), "ip netns %s %s > /dev/null 2>&1", verb, *ns); + system(cmd); + ns++; + } +} + struct netns_setup_result { int ifindex_veth_src_fwd; int ifindex_veth_dst_fwd; @@ -762,6 +774,8 @@ static void test_tc_redirect_peer_l3(struct netns_setup_result *setup_result) static void *test_tc_redirect_run_tests(void *arg) { + netns_setup_namespaces_nofail("delete"); + RUN_TEST(tc_redirect_peer); RUN_TEST(tc_redirect_peer_l3); RUN_TEST(tc_redirect_neigh); From 252c765bd764a246a8bd516fabf6d6123df4a24f Mon Sep 17 00:00:00 2001 From: Tong Tiangen Date: Wed, 27 Oct 2021 11:18:22 +0000 Subject: [PATCH 143/181] riscv, bpf: Add BPF exception tables MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit When a tracing BPF program attempts to read memory without using the bpf_probe_read() helper, the verifier marks the load instruction with the BPF_PROBE_MEM flag. Since the riscv JIT does not currently recognize this flag it falls back to the interpreter. Add support for BPF_PROBE_MEM, by appending an exception table to the BPF program. If the load instruction causes a data abort, the fixup infrastructure finds the exception table and fixes up the fault, by clearing the destination register and jumping over the faulting instruction. A more generic solution would add a "handler" field to the table entry, like on x86 and s390. The same issue in ARM64 is fixed in 800834285361 ("bpf, arm64: Add BPF exception tables"). Signed-off-by: Tong Tiangen Signed-off-by: Daniel Borkmann Tested-by: Pu Lehui Tested-by: Björn Töpel Acked-by: Björn Töpel Link: https://lore.kernel.org/bpf/20211027111822.3801679-1-tongtiangen@huawei.com --- arch/riscv/mm/extable.c | 19 +++- arch/riscv/net/bpf_jit.h | 1 + arch/riscv/net/bpf_jit_comp64.c | 185 +++++++++++++++++++++++++------- arch/riscv/net/bpf_jit_core.c | 19 ++-- 4 files changed, 177 insertions(+), 47 deletions(-) diff --git a/arch/riscv/mm/extable.c b/arch/riscv/mm/extable.c index 2fc7294221515..18bf338303b67 100644 --- a/arch/riscv/mm/extable.c +++ b/arch/riscv/mm/extable.c @@ -11,14 +11,23 @@ #include #include +#ifdef CONFIG_BPF_JIT +int rv_bpf_fixup_exception(const struct exception_table_entry *ex, struct pt_regs *regs); +#endif + int fixup_exception(struct pt_regs *regs) { const struct exception_table_entry *fixup; fixup = search_exception_tables(regs->epc); - if (fixup) { - regs->epc = fixup->fixup; - return 1; - } - return 0; + if (!fixup) + return 0; + +#ifdef CONFIG_BPF_JIT + if (regs->epc >= BPF_JIT_REGION_START && regs->epc < BPF_JIT_REGION_END) + return rv_bpf_fixup_exception(fixup, regs); +#endif + + regs->epc = fixup->fixup; + return 1; } diff --git a/arch/riscv/net/bpf_jit.h b/arch/riscv/net/bpf_jit.h index 75c1e99968675..f42d9cd3b64df 100644 --- a/arch/riscv/net/bpf_jit.h +++ b/arch/riscv/net/bpf_jit.h @@ -71,6 +71,7 @@ struct rv_jit_context { int ninsns; int epilogue_offset; int *offset; /* BPF to RV */ + int nexentries; unsigned long flags; int stack_size; }; diff --git a/arch/riscv/net/bpf_jit_comp64.c b/arch/riscv/net/bpf_jit_comp64.c index 3af4131c22c7a..2ca345c7b0bf3 100644 --- a/arch/riscv/net/bpf_jit_comp64.c +++ b/arch/riscv/net/bpf_jit_comp64.c @@ -5,6 +5,7 @@ * */ +#include #include #include #include "bpf_jit.h" @@ -27,6 +28,21 @@ static const int regmap[] = { [BPF_REG_AX] = RV_REG_T0, }; +static const int pt_regmap[] = { + [RV_REG_A0] = offsetof(struct pt_regs, a0), + [RV_REG_A1] = offsetof(struct pt_regs, a1), + [RV_REG_A2] = offsetof(struct pt_regs, a2), + [RV_REG_A3] = offsetof(struct pt_regs, a3), + [RV_REG_A4] = offsetof(struct pt_regs, a4), + [RV_REG_A5] = offsetof(struct pt_regs, a5), + [RV_REG_S1] = offsetof(struct pt_regs, s1), + [RV_REG_S2] = offsetof(struct pt_regs, s2), + [RV_REG_S3] = offsetof(struct pt_regs, s3), + [RV_REG_S4] = offsetof(struct pt_regs, s4), + [RV_REG_S5] = offsetof(struct pt_regs, s5), + [RV_REG_T0] = offsetof(struct pt_regs, t0), +}; + enum { RV_CTX_F_SEEN_TAIL_CALL = 0, RV_CTX_F_SEEN_CALL = RV_REG_RA, @@ -440,6 +456,69 @@ static int emit_call(bool fixed, u64 addr, struct rv_jit_context *ctx) return 0; } +#define BPF_FIXUP_OFFSET_MASK GENMASK(26, 0) +#define BPF_FIXUP_REG_MASK GENMASK(31, 27) + +int rv_bpf_fixup_exception(const struct exception_table_entry *ex, + struct pt_regs *regs) +{ + off_t offset = FIELD_GET(BPF_FIXUP_OFFSET_MASK, ex->fixup); + int regs_offset = FIELD_GET(BPF_FIXUP_REG_MASK, ex->fixup); + + *(unsigned long *)((void *)regs + pt_regmap[regs_offset]) = 0; + regs->epc = (unsigned long)&ex->fixup - offset; + + return 1; +} + +/* For accesses to BTF pointers, add an entry to the exception table */ +static int add_exception_handler(const struct bpf_insn *insn, + struct rv_jit_context *ctx, + int dst_reg, int insn_len) +{ + struct exception_table_entry *ex; + unsigned long pc; + off_t offset; + + if (!ctx->insns || !ctx->prog->aux->extable || BPF_MODE(insn->code) != BPF_PROBE_MEM) + return 0; + + if (WARN_ON_ONCE(ctx->nexentries >= ctx->prog->aux->num_exentries)) + return -EINVAL; + + if (WARN_ON_ONCE(insn_len > ctx->ninsns)) + return -EINVAL; + + if (WARN_ON_ONCE(!rvc_enabled() && insn_len == 1)) + return -EINVAL; + + ex = &ctx->prog->aux->extable[ctx->nexentries]; + pc = (unsigned long)&ctx->insns[ctx->ninsns - insn_len]; + + offset = pc - (long)&ex->insn; + if (WARN_ON_ONCE(offset >= 0 || offset < INT_MIN)) + return -ERANGE; + ex->insn = pc; + + /* + * Since the extable follows the program, the fixup offset is always + * negative and limited to BPF_JIT_REGION_SIZE. Store a positive value + * to keep things simple, and put the destination register in the upper + * bits. We don't need to worry about buildtime or runtime sort + * modifying the upper bits because the table is already sorted, and + * isn't part of the main exception table. + */ + offset = (long)&ex->fixup - (pc + insn_len * sizeof(u16)); + if (!FIELD_FIT(BPF_FIXUP_OFFSET_MASK, offset)) + return -ERANGE; + + ex->fixup = FIELD_PREP(BPF_FIXUP_OFFSET_MASK, offset) | + FIELD_PREP(BPF_FIXUP_REG_MASK, dst_reg); + + ctx->nexentries++; + return 0; +} + int bpf_jit_emit_insn(const struct bpf_insn *insn, struct rv_jit_context *ctx, bool extra_pass) { @@ -893,52 +972,86 @@ int bpf_jit_emit_insn(const struct bpf_insn *insn, struct rv_jit_context *ctx, /* LDX: dst = *(size *)(src + off) */ case BPF_LDX | BPF_MEM | BPF_B: - if (is_12b_int(off)) { - emit(rv_lbu(rd, off, rs), ctx); + case BPF_LDX | BPF_MEM | BPF_H: + case BPF_LDX | BPF_MEM | BPF_W: + case BPF_LDX | BPF_MEM | BPF_DW: + case BPF_LDX | BPF_PROBE_MEM | BPF_B: + case BPF_LDX | BPF_PROBE_MEM | BPF_H: + case BPF_LDX | BPF_PROBE_MEM | BPF_W: + case BPF_LDX | BPF_PROBE_MEM | BPF_DW: + { + int insn_len, insns_start; + + switch (BPF_SIZE(code)) { + case BPF_B: + if (is_12b_int(off)) { + insns_start = ctx->ninsns; + emit(rv_lbu(rd, off, rs), ctx); + insn_len = ctx->ninsns - insns_start; + break; + } + + emit_imm(RV_REG_T1, off, ctx); + emit_add(RV_REG_T1, RV_REG_T1, rs, ctx); + insns_start = ctx->ninsns; + emit(rv_lbu(rd, 0, RV_REG_T1), ctx); + insn_len = ctx->ninsns - insns_start; + if (insn_is_zext(&insn[1])) + return 1; break; - } + case BPF_H: + if (is_12b_int(off)) { + insns_start = ctx->ninsns; + emit(rv_lhu(rd, off, rs), ctx); + insn_len = ctx->ninsns - insns_start; + break; + } - emit_imm(RV_REG_T1, off, ctx); - emit_add(RV_REG_T1, RV_REG_T1, rs, ctx); - emit(rv_lbu(rd, 0, RV_REG_T1), ctx); - if (insn_is_zext(&insn[1])) - return 1; - break; - case BPF_LDX | BPF_MEM | BPF_H: - if (is_12b_int(off)) { - emit(rv_lhu(rd, off, rs), ctx); + emit_imm(RV_REG_T1, off, ctx); + emit_add(RV_REG_T1, RV_REG_T1, rs, ctx); + insns_start = ctx->ninsns; + emit(rv_lhu(rd, 0, RV_REG_T1), ctx); + insn_len = ctx->ninsns - insns_start; + if (insn_is_zext(&insn[1])) + return 1; break; - } + case BPF_W: + if (is_12b_int(off)) { + insns_start = ctx->ninsns; + emit(rv_lwu(rd, off, rs), ctx); + insn_len = ctx->ninsns - insns_start; + break; + } - emit_imm(RV_REG_T1, off, ctx); - emit_add(RV_REG_T1, RV_REG_T1, rs, ctx); - emit(rv_lhu(rd, 0, RV_REG_T1), ctx); - if (insn_is_zext(&insn[1])) - return 1; - break; - case BPF_LDX | BPF_MEM | BPF_W: - if (is_12b_int(off)) { - emit(rv_lwu(rd, off, rs), ctx); + emit_imm(RV_REG_T1, off, ctx); + emit_add(RV_REG_T1, RV_REG_T1, rs, ctx); + insns_start = ctx->ninsns; + emit(rv_lwu(rd, 0, RV_REG_T1), ctx); + insn_len = ctx->ninsns - insns_start; + if (insn_is_zext(&insn[1])) + return 1; break; - } + case BPF_DW: + if (is_12b_int(off)) { + insns_start = ctx->ninsns; + emit_ld(rd, off, rs, ctx); + insn_len = ctx->ninsns - insns_start; + break; + } - emit_imm(RV_REG_T1, off, ctx); - emit_add(RV_REG_T1, RV_REG_T1, rs, ctx); - emit(rv_lwu(rd, 0, RV_REG_T1), ctx); - if (insn_is_zext(&insn[1])) - return 1; - break; - case BPF_LDX | BPF_MEM | BPF_DW: - if (is_12b_int(off)) { - emit_ld(rd, off, rs, ctx); + emit_imm(RV_REG_T1, off, ctx); + emit_add(RV_REG_T1, RV_REG_T1, rs, ctx); + insns_start = ctx->ninsns; + emit_ld(rd, 0, RV_REG_T1, ctx); + insn_len = ctx->ninsns - insns_start; break; } - emit_imm(RV_REG_T1, off, ctx); - emit_add(RV_REG_T1, RV_REG_T1, rs, ctx); - emit_ld(rd, 0, RV_REG_T1, ctx); + ret = add_exception_handler(insn, ctx, rd, insn_len); + if (ret) + return ret; break; - + } /* speculation barrier */ case BPF_ST | BPF_NOSPEC: break; diff --git a/arch/riscv/net/bpf_jit_core.c b/arch/riscv/net/bpf_jit_core.c index fed86f42dfbe5..7ccc809f2c192 100644 --- a/arch/riscv/net/bpf_jit_core.c +++ b/arch/riscv/net/bpf_jit_core.c @@ -41,12 +41,12 @@ bool bpf_jit_needs_zext(void) struct bpf_prog *bpf_int_jit_compile(struct bpf_prog *prog) { + unsigned int prog_size = 0, extable_size = 0; bool tmp_blinded = false, extra_pass = false; struct bpf_prog *tmp, *orig_prog = prog; int pass = 0, prev_ninsns = 0, i; struct rv_jit_data *jit_data; struct rv_jit_context *ctx; - unsigned int image_size = 0; if (!prog->jit_requested) return orig_prog; @@ -73,7 +73,7 @@ struct bpf_prog *bpf_int_jit_compile(struct bpf_prog *prog) if (ctx->offset) { extra_pass = true; - image_size = sizeof(*ctx->insns) * ctx->ninsns; + prog_size = sizeof(*ctx->insns) * ctx->ninsns; goto skip_init_ctx; } @@ -102,10 +102,13 @@ struct bpf_prog *bpf_int_jit_compile(struct bpf_prog *prog) if (ctx->ninsns == prev_ninsns) { if (jit_data->header) break; + /* obtain the actual image size */ + extable_size = prog->aux->num_exentries * + sizeof(struct exception_table_entry); + prog_size = sizeof(*ctx->insns) * ctx->ninsns; - image_size = sizeof(*ctx->insns) * ctx->ninsns; jit_data->header = - bpf_jit_binary_alloc(image_size, + bpf_jit_binary_alloc(prog_size + extable_size, &jit_data->image, sizeof(u32), bpf_fill_ill_insns); @@ -130,9 +133,13 @@ struct bpf_prog *bpf_int_jit_compile(struct bpf_prog *prog) goto out_offset; } + if (extable_size) + prog->aux->extable = (void *)ctx->insns + prog_size; + skip_init_ctx: pass++; ctx->ninsns = 0; + ctx->nexentries = 0; bpf_jit_build_prologue(ctx); if (build_body(ctx, extra_pass, NULL)) { @@ -143,11 +150,11 @@ struct bpf_prog *bpf_int_jit_compile(struct bpf_prog *prog) bpf_jit_build_epilogue(ctx); if (bpf_jit_enable > 1) - bpf_jit_dump(prog->len, image_size, pass, ctx->insns); + bpf_jit_dump(prog->len, prog_size, pass, ctx->insns); prog->bpf_func = (void *)ctx->insns; prog->jited = 1; - prog->jited_len = image_size; + prog->jited_len = prog_size; bpf_flush_icache(jit_data->header, ctx->insns + ctx->ninsns); From b066abba3ef16a4a085d237e95da0de3f0b87713 Mon Sep 17 00:00:00 2001 From: Tiezhu Yang Date: Thu, 28 Oct 2021 09:25:21 +0800 Subject: [PATCH 144/181] bpf, tests: Add module parameter test_suite to test_bpf module After commit 9298e63eafea ("bpf/tests: Add exhaustive tests of ALU operand magnitudes"), when modprobe test_bpf.ko with JIT on mips64, there exists segment fault due to the following reason: [...] ALU64_MOV_X: all register value magnitudes jited:1 Break instruction in kernel code[#1] [...] It seems that the related JIT implementations of some test cases in test_bpf() have problems. At this moment, I do not care about the segment fault while I just want to verify the test cases of tail calls. Based on the above background and motivation, add the following module parameter test_suite to the test_bpf.ko: test_suite=: only the specified test suite will be run, the string can be "test_bpf", "test_tail_calls" or "test_skb_segment". If test_suite is not specified, but test_id, test_name or test_range is specified, set 'test_bpf' as the default test suite. This is useful to only test the corresponding test suite when specifying the valid test_suite string. Any invalid test suite will result in -EINVAL being returned and no tests being run. If the test_suite is not specified or specified as empty string, it does not change the current logic, all of the test cases will be run. Here are some test results: # dmesg -c # modprobe test_bpf # dmesg | grep Summary test_bpf: Summary: 1009 PASSED, 0 FAILED, [0/997 JIT'ed] test_bpf: test_tail_calls: Summary: 8 PASSED, 0 FAILED, [0/8 JIT'ed] test_bpf: test_skb_segment: Summary: 2 PASSED, 0 FAILED # rmmod test_bpf # dmesg -c # modprobe test_bpf test_suite=test_bpf # dmesg | tail -1 test_bpf: Summary: 1009 PASSED, 0 FAILED, [0/997 JIT'ed] # rmmod test_bpf # dmesg -c # modprobe test_bpf test_suite=test_tail_calls # dmesg test_bpf: #0 Tail call leaf jited:0 21 PASS [...] test_bpf: #7 Tail call error path, index out of range jited:0 32 PASS test_bpf: test_tail_calls: Summary: 8 PASSED, 0 FAILED, [0/8 JIT'ed] # rmmod test_bpf # dmesg -c # modprobe test_bpf test_suite=test_skb_segment # dmesg test_bpf: #0 gso_with_rx_frags PASS test_bpf: #1 gso_linear_no_head_frag PASS test_bpf: test_skb_segment: Summary: 2 PASSED, 0 FAILED # rmmod test_bpf # dmesg -c # modprobe test_bpf test_id=1 # dmesg test_bpf: test_bpf: set 'test_bpf' as the default test_suite. test_bpf: #1 TXA jited:0 54 51 50 PASS test_bpf: Summary: 1 PASSED, 0 FAILED, [0/1 JIT'ed] # rmmod test_bpf # dmesg -c # modprobe test_bpf test_suite=test_bpf test_name=TXA # dmesg test_bpf: #1 TXA jited:0 54 50 51 PASS test_bpf: Summary: 1 PASSED, 0 FAILED, [0/1 JIT'ed] # rmmod test_bpf # dmesg -c # modprobe test_bpf test_suite=test_tail_calls test_range=6,7 # dmesg test_bpf: #6 Tail call error path, NULL target jited:0 41 PASS test_bpf: #7 Tail call error path, index out of range jited:0 32 PASS test_bpf: test_tail_calls: Summary: 2 PASSED, 0 FAILED, [0/2 JIT'ed] # rmmod test_bpf # dmesg -c # modprobe test_bpf test_suite=test_skb_segment test_id=1 # dmesg test_bpf: #1 gso_linear_no_head_frag PASS test_bpf: test_skb_segment: Summary: 1 PASSED, 0 FAILED By the way, the above segment fault has been fixed in the latest bpf-next tree which contains the mips64 JIT rework. Signed-off-by: Tiezhu Yang Signed-off-by: Daniel Borkmann Tested-by: Johan Almbladh Acked-by: Johan Almbladh Link: https://lore.kernel.org/bpf/1635384321-28128-1-git-send-email-yangtiezhu@loongson.cn --- lib/test_bpf.c | 212 +++++++++++++++++++++++++++++++------------------ 1 file changed, 135 insertions(+), 77 deletions(-) diff --git a/lib/test_bpf.c b/lib/test_bpf.c index e5b10fdefab54..adae39567264f 100644 --- a/lib/test_bpf.c +++ b/lib/test_bpf.c @@ -14316,72 +14316,9 @@ module_param_string(test_name, test_name, sizeof(test_name), 0); static int test_id = -1; module_param(test_id, int, 0); -static int test_range[2] = { 0, ARRAY_SIZE(tests) - 1 }; +static int test_range[2] = { 0, INT_MAX }; module_param_array(test_range, int, NULL, 0); -static __init int find_test_index(const char *test_name) -{ - int i; - - for (i = 0; i < ARRAY_SIZE(tests); i++) { - if (!strcmp(tests[i].descr, test_name)) - return i; - } - return -1; -} - -static __init int prepare_bpf_tests(void) -{ - if (test_id >= 0) { - /* - * if a test_id was specified, use test_range to - * cover only that test. - */ - if (test_id >= ARRAY_SIZE(tests)) { - pr_err("test_bpf: invalid test_id specified.\n"); - return -EINVAL; - } - - test_range[0] = test_id; - test_range[1] = test_id; - } else if (*test_name) { - /* - * if a test_name was specified, find it and setup - * test_range to cover only that test. - */ - int idx = find_test_index(test_name); - - if (idx < 0) { - pr_err("test_bpf: no test named '%s' found.\n", - test_name); - return -EINVAL; - } - test_range[0] = idx; - test_range[1] = idx; - } else { - /* - * check that the supplied test_range is valid. - */ - if (test_range[0] >= ARRAY_SIZE(tests) || - test_range[1] >= ARRAY_SIZE(tests) || - test_range[0] < 0 || test_range[1] < 0) { - pr_err("test_bpf: test_range is out of bound.\n"); - return -EINVAL; - } - - if (test_range[1] < test_range[0]) { - pr_err("test_bpf: test_range is ending before it starts.\n"); - return -EINVAL; - } - } - - return 0; -} - -static __init void destroy_bpf_tests(void) -{ -} - static bool exclude_test(int test_id) { return test_id < test_range[0] || test_id > test_range[1]; @@ -14553,6 +14490,10 @@ static __init int test_skb_segment(void) for (i = 0; i < ARRAY_SIZE(skb_segment_tests); i++) { const struct skb_segment_test *test = &skb_segment_tests[i]; + cond_resched(); + if (exclude_test(i)) + continue; + pr_info("#%d %s ", i, test->descr); if (test_skb_segment_single(test)) { @@ -14934,6 +14875,8 @@ static __init int test_tail_calls(struct bpf_array *progs) int ret; cond_resched(); + if (exclude_test(i)) + continue; pr_info("#%d %s ", i, test->descr); if (!fp) { @@ -14966,29 +14909,144 @@ static __init int test_tail_calls(struct bpf_array *progs) return err_cnt ? -EINVAL : 0; } +static char test_suite[32]; +module_param_string(test_suite, test_suite, sizeof(test_suite), 0); + +static __init int find_test_index(const char *test_name) +{ + int i; + + if (!strcmp(test_suite, "test_bpf")) { + for (i = 0; i < ARRAY_SIZE(tests); i++) { + if (!strcmp(tests[i].descr, test_name)) + return i; + } + } + + if (!strcmp(test_suite, "test_tail_calls")) { + for (i = 0; i < ARRAY_SIZE(tail_call_tests); i++) { + if (!strcmp(tail_call_tests[i].descr, test_name)) + return i; + } + } + + if (!strcmp(test_suite, "test_skb_segment")) { + for (i = 0; i < ARRAY_SIZE(skb_segment_tests); i++) { + if (!strcmp(skb_segment_tests[i].descr, test_name)) + return i; + } + } + + return -1; +} + +static __init int prepare_test_range(void) +{ + int valid_range; + + if (!strcmp(test_suite, "test_bpf")) + valid_range = ARRAY_SIZE(tests); + else if (!strcmp(test_suite, "test_tail_calls")) + valid_range = ARRAY_SIZE(tail_call_tests); + else if (!strcmp(test_suite, "test_skb_segment")) + valid_range = ARRAY_SIZE(skb_segment_tests); + else + return 0; + + if (test_id >= 0) { + /* + * if a test_id was specified, use test_range to + * cover only that test. + */ + if (test_id >= valid_range) { + pr_err("test_bpf: invalid test_id specified for '%s' suite.\n", + test_suite); + return -EINVAL; + } + + test_range[0] = test_id; + test_range[1] = test_id; + } else if (*test_name) { + /* + * if a test_name was specified, find it and setup + * test_range to cover only that test. + */ + int idx = find_test_index(test_name); + + if (idx < 0) { + pr_err("test_bpf: no test named '%s' found for '%s' suite.\n", + test_name, test_suite); + return -EINVAL; + } + test_range[0] = idx; + test_range[1] = idx; + } else if (test_range[0] != 0 || test_range[1] != INT_MAX) { + /* + * check that the supplied test_range is valid. + */ + if (test_range[0] < 0 || test_range[1] >= valid_range) { + pr_err("test_bpf: test_range is out of bound for '%s' suite.\n", + test_suite); + return -EINVAL; + } + + if (test_range[1] < test_range[0]) { + pr_err("test_bpf: test_range is ending before it starts.\n"); + return -EINVAL; + } + } + + return 0; +} + static int __init test_bpf_init(void) { struct bpf_array *progs = NULL; int ret; - ret = prepare_bpf_tests(); + if (strlen(test_suite) && + strcmp(test_suite, "test_bpf") && + strcmp(test_suite, "test_tail_calls") && + strcmp(test_suite, "test_skb_segment")) { + pr_err("test_bpf: invalid test_suite '%s' specified.\n", test_suite); + return -EINVAL; + } + + /* + * if test_suite is not specified, but test_id, test_name or test_range + * is specified, set 'test_bpf' as the default test suite. + */ + if (!strlen(test_suite) && + (test_id != -1 || strlen(test_name) || + (test_range[0] != 0 || test_range[1] != INT_MAX))) { + pr_info("test_bpf: set 'test_bpf' as the default test_suite.\n"); + strscpy(test_suite, "test_bpf", sizeof(test_suite)); + } + + ret = prepare_test_range(); if (ret < 0) return ret; - ret = test_bpf(); - destroy_bpf_tests(); - if (ret) - return ret; + if (!strlen(test_suite) || !strcmp(test_suite, "test_bpf")) { + ret = test_bpf(); + if (ret) + return ret; + } - ret = prepare_tail_call_tests(&progs); - if (ret) - return ret; - ret = test_tail_calls(progs); - destroy_tail_call_tests(progs); - if (ret) - return ret; + if (!strlen(test_suite) || !strcmp(test_suite, "test_tail_calls")) { + ret = prepare_tail_call_tests(&progs); + if (ret) + return ret; + ret = test_tail_calls(progs); + destroy_tail_call_tests(progs); + if (ret) + return ret; + } - return test_skb_segment(); + if (!strlen(test_suite) || !strcmp(test_suite, "test_skb_segment")) + return test_skb_segment(); + + return 0; } static void __exit test_bpf_exit(void) From 9330986c03006ab1d33d243b7cfe598a7a3c1baa Mon Sep 17 00:00:00 2001 From: Joanne Koong Date: Wed, 27 Oct 2021 16:45:00 -0700 Subject: [PATCH 145/181] bpf: Add bloom filter map implementation This patch adds the kernel-side changes for the implementation of a bpf bloom filter map. The bloom filter map supports peek (determining whether an element is present in the map) and push (adding an element to the map) operations.These operations are exposed to userspace applications through the already existing syscalls in the following way: BPF_MAP_LOOKUP_ELEM -> peek BPF_MAP_UPDATE_ELEM -> push The bloom filter map does not have keys, only values. In light of this, the bloom filter map's API matches that of queue stack maps: user applications use BPF_MAP_LOOKUP_ELEM/BPF_MAP_UPDATE_ELEM which correspond internally to bpf_map_peek_elem/bpf_map_push_elem, and bpf programs must use the bpf_map_peek_elem and bpf_map_push_elem APIs to query or add an element to the bloom filter map. When the bloom filter map is created, it must be created with a key_size of 0. For updates, the user will pass in the element to add to the map as the value, with a NULL key. For lookups, the user will pass in the element to query in the map as the value, with a NULL key. In the verifier layer, this requires us to modify the argument type of a bloom filter's BPF_FUNC_map_peek_elem call to ARG_PTR_TO_MAP_VALUE; as well, in the syscall layer, we need to copy over the user value so that in bpf_map_peek_elem, we know which specific value to query. A few things to please take note of: * If there are any concurrent lookups + updates, the user is responsible for synchronizing this to ensure no false negative lookups occur. * The number of hashes to use for the bloom filter is configurable from userspace. If no number is specified, the default used will be 5 hash functions. The benchmarks later in this patchset can help compare the performance of using different number of hashes on different entry sizes. In general, using more hashes decreases both the false positive rate and the speed of a lookup. * Deleting an element in the bloom filter map is not supported. * The bloom filter map may be used as an inner map. * The "max_entries" size that is specified at map creation time is used to approximate a reasonable bitmap size for the bloom filter, and is not otherwise strictly enforced. If the user wishes to insert more entries into the bloom filter than "max_entries", they may do so but they should be aware that this may lead to a higher false positive rate. Signed-off-by: Joanne Koong Signed-off-by: Alexei Starovoitov Acked-by: Andrii Nakryiko Link: https://lore.kernel.org/bpf/20211027234504.30744-2-joannekoong@fb.com --- include/linux/bpf.h | 1 + include/linux/bpf_types.h | 1 + include/uapi/linux/bpf.h | 9 ++ kernel/bpf/Makefile | 2 +- kernel/bpf/bloom_filter.c | 195 +++++++++++++++++++++++++++++++++ kernel/bpf/syscall.c | 24 +++- kernel/bpf/verifier.c | 19 +++- tools/include/uapi/linux/bpf.h | 9 ++ 8 files changed, 253 insertions(+), 7 deletions(-) create mode 100644 kernel/bpf/bloom_filter.c diff --git a/include/linux/bpf.h b/include/linux/bpf.h index 31421c74ba081..50105e0b8fcc0 100644 --- a/include/linux/bpf.h +++ b/include/linux/bpf.h @@ -169,6 +169,7 @@ struct bpf_map { u32 value_size; u32 max_entries; u32 map_flags; + u64 map_extra; /* any per-map-type extra fields */ int spin_lock_off; /* >=0 valid offset, <0 error */ int timer_off; /* >=0 valid offset, <0 error */ u32 id; diff --git a/include/linux/bpf_types.h b/include/linux/bpf_types.h index 9c81724e4b985..c4424ac2fa029 100644 --- a/include/linux/bpf_types.h +++ b/include/linux/bpf_types.h @@ -125,6 +125,7 @@ BPF_MAP_TYPE(BPF_MAP_TYPE_STACK, stack_map_ops) BPF_MAP_TYPE(BPF_MAP_TYPE_STRUCT_OPS, bpf_struct_ops_map_ops) #endif BPF_MAP_TYPE(BPF_MAP_TYPE_RINGBUF, ringbuf_map_ops) +BPF_MAP_TYPE(BPF_MAP_TYPE_BLOOM_FILTER, bloom_filter_map_ops) BPF_LINK_TYPE(BPF_LINK_TYPE_RAW_TRACEPOINT, raw_tracepoint) BPF_LINK_TYPE(BPF_LINK_TYPE_TRACING, tracing) diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h index c108200378834..8bead4aa3ad04 100644 --- a/include/uapi/linux/bpf.h +++ b/include/uapi/linux/bpf.h @@ -906,6 +906,7 @@ enum bpf_map_type { BPF_MAP_TYPE_RINGBUF, BPF_MAP_TYPE_INODE_STORAGE, BPF_MAP_TYPE_TASK_STORAGE, + BPF_MAP_TYPE_BLOOM_FILTER, }; /* Note that tracing related programs such as @@ -1274,6 +1275,13 @@ union bpf_attr { * struct stored as the * map value */ + /* Any per-map-type extra fields + * + * BPF_MAP_TYPE_BLOOM_FILTER - the lowest 4 bits indicate the + * number of hash functions (if 0, the bloom filter will default + * to using 5 hash functions). + */ + __u64 map_extra; }; struct { /* anonymous struct used by BPF_MAP_*_ELEM commands */ @@ -5638,6 +5646,7 @@ struct bpf_map_info { __u32 btf_id; __u32 btf_key_type_id; __u32 btf_value_type_id; + __u64 map_extra; } __attribute__((aligned(8))); struct bpf_btf_info { diff --git a/kernel/bpf/Makefile b/kernel/bpf/Makefile index 7f33098ca63fb..cf6ca339f3cd8 100644 --- a/kernel/bpf/Makefile +++ b/kernel/bpf/Makefile @@ -7,7 +7,7 @@ endif CFLAGS_core.o += $(call cc-disable-warning, override-init) $(cflags-nogcse-yy) obj-$(CONFIG_BPF_SYSCALL) += syscall.o verifier.o inode.o helpers.o tnum.o bpf_iter.o map_iter.o task_iter.o prog_iter.o -obj-$(CONFIG_BPF_SYSCALL) += hashtab.o arraymap.o percpu_freelist.o bpf_lru_list.o lpm_trie.o map_in_map.o +obj-$(CONFIG_BPF_SYSCALL) += hashtab.o arraymap.o percpu_freelist.o bpf_lru_list.o lpm_trie.o map_in_map.o bloom_filter.o obj-$(CONFIG_BPF_SYSCALL) += local_storage.o queue_stack_maps.o ringbuf.o obj-$(CONFIG_BPF_SYSCALL) += bpf_local_storage.o bpf_task_storage.o obj-${CONFIG_BPF_LSM} += bpf_inode_storage.o diff --git a/kernel/bpf/bloom_filter.c b/kernel/bpf/bloom_filter.c new file mode 100644 index 0000000000000..7c50232b7571f --- /dev/null +++ b/kernel/bpf/bloom_filter.c @@ -0,0 +1,195 @@ +// SPDX-License-Identifier: GPL-2.0 +/* Copyright (c) 2021 Facebook */ + +#include +#include +#include +#include +#include +#include + +#define BLOOM_CREATE_FLAG_MASK \ + (BPF_F_NUMA_NODE | BPF_F_ZERO_SEED | BPF_F_ACCESS_MASK) + +struct bpf_bloom_filter { + struct bpf_map map; + u32 bitset_mask; + u32 hash_seed; + /* If the size of the values in the bloom filter is u32 aligned, + * then it is more performant to use jhash2 as the underlying hash + * function, else we use jhash. This tracks the number of u32s + * in an u32-aligned value size. If the value size is not u32 aligned, + * this will be 0. + */ + u32 aligned_u32_count; + u32 nr_hash_funcs; + unsigned long bitset[]; +}; + +static u32 hash(struct bpf_bloom_filter *bloom, void *value, + u32 value_size, u32 index) +{ + u32 h; + + if (bloom->aligned_u32_count) + h = jhash2(value, bloom->aligned_u32_count, + bloom->hash_seed + index); + else + h = jhash(value, value_size, bloom->hash_seed + index); + + return h & bloom->bitset_mask; +} + +static int peek_elem(struct bpf_map *map, void *value) +{ + struct bpf_bloom_filter *bloom = + container_of(map, struct bpf_bloom_filter, map); + u32 i, h; + + for (i = 0; i < bloom->nr_hash_funcs; i++) { + h = hash(bloom, value, map->value_size, i); + if (!test_bit(h, bloom->bitset)) + return -ENOENT; + } + + return 0; +} + +static int push_elem(struct bpf_map *map, void *value, u64 flags) +{ + struct bpf_bloom_filter *bloom = + container_of(map, struct bpf_bloom_filter, map); + u32 i, h; + + if (flags != BPF_ANY) + return -EINVAL; + + for (i = 0; i < bloom->nr_hash_funcs; i++) { + h = hash(bloom, value, map->value_size, i); + set_bit(h, bloom->bitset); + } + + return 0; +} + +static int pop_elem(struct bpf_map *map, void *value) +{ + return -EOPNOTSUPP; +} + +static struct bpf_map *map_alloc(union bpf_attr *attr) +{ + u32 bitset_bytes, bitset_mask, nr_hash_funcs, nr_bits; + int numa_node = bpf_map_attr_numa_node(attr); + struct bpf_bloom_filter *bloom; + + if (!bpf_capable()) + return ERR_PTR(-EPERM); + + if (attr->key_size != 0 || attr->value_size == 0 || + attr->max_entries == 0 || + attr->map_flags & ~BLOOM_CREATE_FLAG_MASK || + !bpf_map_flags_access_ok(attr->map_flags) || + (attr->map_extra & ~0xF)) + return ERR_PTR(-EINVAL); + + /* The lower 4 bits of map_extra specify the number of hash functions */ + nr_hash_funcs = attr->map_extra & 0xF; + if (nr_hash_funcs == 0) + /* Default to using 5 hash functions if unspecified */ + nr_hash_funcs = 5; + + /* For the bloom filter, the optimal bit array size that minimizes the + * false positive probability is n * k / ln(2) where n is the number of + * expected entries in the bloom filter and k is the number of hash + * functions. We use 7 / 5 to approximate 1 / ln(2). + * + * We round this up to the nearest power of two to enable more efficient + * hashing using bitmasks. The bitmask will be the bit array size - 1. + * + * If this overflows a u32, the bit array size will have 2^32 (4 + * GB) bits. + */ + if (check_mul_overflow(attr->max_entries, nr_hash_funcs, &nr_bits) || + check_mul_overflow(nr_bits / 5, (u32)7, &nr_bits) || + nr_bits > (1UL << 31)) { + /* The bit array size is 2^32 bits but to avoid overflowing the + * u32, we use U32_MAX, which will round up to the equivalent + * number of bytes + */ + bitset_bytes = BITS_TO_BYTES(U32_MAX); + bitset_mask = U32_MAX; + } else { + if (nr_bits <= BITS_PER_LONG) + nr_bits = BITS_PER_LONG; + else + nr_bits = roundup_pow_of_two(nr_bits); + bitset_bytes = BITS_TO_BYTES(nr_bits); + bitset_mask = nr_bits - 1; + } + + bitset_bytes = roundup(bitset_bytes, sizeof(unsigned long)); + bloom = bpf_map_area_alloc(sizeof(*bloom) + bitset_bytes, numa_node); + + if (!bloom) + return ERR_PTR(-ENOMEM); + + bpf_map_init_from_attr(&bloom->map, attr); + + bloom->nr_hash_funcs = nr_hash_funcs; + bloom->bitset_mask = bitset_mask; + + /* Check whether the value size is u32-aligned */ + if ((attr->value_size & (sizeof(u32) - 1)) == 0) + bloom->aligned_u32_count = + attr->value_size / sizeof(u32); + + if (!(attr->map_flags & BPF_F_ZERO_SEED)) + bloom->hash_seed = get_random_int(); + + return &bloom->map; +} + +static void map_free(struct bpf_map *map) +{ + struct bpf_bloom_filter *bloom = + container_of(map, struct bpf_bloom_filter, map); + + bpf_map_area_free(bloom); +} + +static void *lookup_elem(struct bpf_map *map, void *key) +{ + /* The eBPF program should use map_peek_elem instead */ + return ERR_PTR(-EINVAL); +} + +static int update_elem(struct bpf_map *map, void *key, + void *value, u64 flags) +{ + /* The eBPF program should use map_push_elem instead */ + return -EINVAL; +} + +static int check_btf(const struct bpf_map *map, const struct btf *btf, + const struct btf_type *key_type, + const struct btf_type *value_type) +{ + /* Bloom filter maps are keyless */ + return btf_type_is_void(key_type) ? 0 : -EINVAL; +} + +static int bpf_bloom_btf_id; +const struct bpf_map_ops bloom_filter_map_ops = { + .map_meta_equal = bpf_map_meta_equal, + .map_alloc = map_alloc, + .map_free = map_free, + .map_push_elem = push_elem, + .map_peek_elem = peek_elem, + .map_pop_elem = pop_elem, + .map_lookup_elem = lookup_elem, + .map_update_elem = update_elem, + .map_check_btf = check_btf, + .map_btf_name = "bpf_bloom_filter", + .map_btf_id = &bpf_bloom_btf_id, +}; diff --git a/kernel/bpf/syscall.c b/kernel/bpf/syscall.c index 3e1c024ce3ed1..f7c2c6354add9 100644 --- a/kernel/bpf/syscall.c +++ b/kernel/bpf/syscall.c @@ -199,7 +199,8 @@ static int bpf_map_update_value(struct bpf_map *map, struct fd f, void *key, err = bpf_fd_reuseport_array_update_elem(map, key, value, flags); } else if (map->map_type == BPF_MAP_TYPE_QUEUE || - map->map_type == BPF_MAP_TYPE_STACK) { + map->map_type == BPF_MAP_TYPE_STACK || + map->map_type == BPF_MAP_TYPE_BLOOM_FILTER) { err = map->ops->map_push_elem(map, value, flags); } else { rcu_read_lock(); @@ -238,7 +239,8 @@ static int bpf_map_copy_value(struct bpf_map *map, void *key, void *value, } else if (map->map_type == BPF_MAP_TYPE_REUSEPORT_SOCKARRAY) { err = bpf_fd_reuseport_array_lookup_elem(map, key, value); } else if (map->map_type == BPF_MAP_TYPE_QUEUE || - map->map_type == BPF_MAP_TYPE_STACK) { + map->map_type == BPF_MAP_TYPE_STACK || + map->map_type == BPF_MAP_TYPE_BLOOM_FILTER) { err = map->ops->map_peek_elem(map, value); } else if (map->map_type == BPF_MAP_TYPE_STRUCT_OPS) { /* struct_ops map requires directly updating "value" */ @@ -348,6 +350,7 @@ void bpf_map_init_from_attr(struct bpf_map *map, union bpf_attr *attr) map->max_entries = attr->max_entries; map->map_flags = bpf_map_flags_retain_permanent(attr->map_flags); map->numa_node = bpf_map_attr_numa_node(attr); + map->map_extra = attr->map_extra; } static int bpf_map_alloc_id(struct bpf_map *map) @@ -553,6 +556,7 @@ static void bpf_map_show_fdinfo(struct seq_file *m, struct file *filp) "value_size:\t%u\n" "max_entries:\t%u\n" "map_flags:\t%#x\n" + "map_extra:\t%#llx\n" "memlock:\t%lu\n" "map_id:\t%u\n" "frozen:\t%u\n", @@ -561,6 +565,7 @@ static void bpf_map_show_fdinfo(struct seq_file *m, struct file *filp) map->value_size, map->max_entries, map->map_flags, + (unsigned long long)map->map_extra, bpf_map_memory_footprint(map), map->id, READ_ONCE(map->frozen)); @@ -810,7 +815,7 @@ static int map_check_btf(struct bpf_map *map, const struct btf *btf, return ret; } -#define BPF_MAP_CREATE_LAST_FIELD btf_vmlinux_value_type_id +#define BPF_MAP_CREATE_LAST_FIELD map_extra /* called via syscall */ static int map_create(union bpf_attr *attr) { @@ -831,6 +836,10 @@ static int map_create(union bpf_attr *attr) return -EINVAL; } + if (attr->map_type != BPF_MAP_TYPE_BLOOM_FILTER && + attr->map_extra != 0) + return -EINVAL; + f_flags = bpf_get_file_flag(attr->map_flags); if (f_flags < 0) return f_flags; @@ -1080,6 +1089,14 @@ static int map_lookup_elem(union bpf_attr *attr) if (!value) goto free_key; + if (map->map_type == BPF_MAP_TYPE_BLOOM_FILTER) { + if (copy_from_user(value, uvalue, value_size)) + err = -EFAULT; + else + err = bpf_map_copy_value(map, key, value, attr->flags); + goto free_value; + } + err = bpf_map_copy_value(map, key, value, attr->flags); if (err) goto free_value; @@ -3881,6 +3898,7 @@ static int bpf_map_get_info_by_fd(struct file *file, info.value_size = map->value_size; info.max_entries = map->max_entries; info.map_flags = map->map_flags; + info.map_extra = map->map_extra; memcpy(info.name, map->name, sizeof(map->name)); if (map->btf) { diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c index c6616e3258038..3c8aa7df17734 100644 --- a/kernel/bpf/verifier.c +++ b/kernel/bpf/verifier.c @@ -5002,7 +5002,10 @@ static int resolve_map_arg_type(struct bpf_verifier_env *env, return -EINVAL; } break; - + case BPF_MAP_TYPE_BLOOM_FILTER: + if (meta->func_id == BPF_FUNC_map_peek_elem) + *arg_type = ARG_PTR_TO_MAP_VALUE; + break; default: break; } @@ -5577,6 +5580,11 @@ static int check_map_func_compatibility(struct bpf_verifier_env *env, func_id != BPF_FUNC_task_storage_delete) goto error; break; + case BPF_MAP_TYPE_BLOOM_FILTER: + if (func_id != BPF_FUNC_map_peek_elem && + func_id != BPF_FUNC_map_push_elem) + goto error; + break; default: break; } @@ -5644,13 +5652,18 @@ static int check_map_func_compatibility(struct bpf_verifier_env *env, map->map_type != BPF_MAP_TYPE_SOCKHASH) goto error; break; - case BPF_FUNC_map_peek_elem: case BPF_FUNC_map_pop_elem: - case BPF_FUNC_map_push_elem: if (map->map_type != BPF_MAP_TYPE_QUEUE && map->map_type != BPF_MAP_TYPE_STACK) goto error; break; + case BPF_FUNC_map_peek_elem: + case BPF_FUNC_map_push_elem: + if (map->map_type != BPF_MAP_TYPE_QUEUE && + map->map_type != BPF_MAP_TYPE_STACK && + map->map_type != BPF_MAP_TYPE_BLOOM_FILTER) + goto error; + break; case BPF_FUNC_sk_storage_get: case BPF_FUNC_sk_storage_delete: if (map->map_type != BPF_MAP_TYPE_SK_STORAGE) diff --git a/tools/include/uapi/linux/bpf.h b/tools/include/uapi/linux/bpf.h index c108200378834..8bead4aa3ad04 100644 --- a/tools/include/uapi/linux/bpf.h +++ b/tools/include/uapi/linux/bpf.h @@ -906,6 +906,7 @@ enum bpf_map_type { BPF_MAP_TYPE_RINGBUF, BPF_MAP_TYPE_INODE_STORAGE, BPF_MAP_TYPE_TASK_STORAGE, + BPF_MAP_TYPE_BLOOM_FILTER, }; /* Note that tracing related programs such as @@ -1274,6 +1275,13 @@ union bpf_attr { * struct stored as the * map value */ + /* Any per-map-type extra fields + * + * BPF_MAP_TYPE_BLOOM_FILTER - the lowest 4 bits indicate the + * number of hash functions (if 0, the bloom filter will default + * to using 5 hash functions). + */ + __u64 map_extra; }; struct { /* anonymous struct used by BPF_MAP_*_ELEM commands */ @@ -5638,6 +5646,7 @@ struct bpf_map_info { __u32 btf_id; __u32 btf_key_type_id; __u32 btf_value_type_id; + __u64 map_extra; } __attribute__((aligned(8))); struct bpf_btf_info { From 47512102cde2d252d7b984d9675cfd3420b48ad9 Mon Sep 17 00:00:00 2001 From: Joanne Koong Date: Wed, 27 Oct 2021 16:45:01 -0700 Subject: [PATCH 146/181] libbpf: Add "map_extra" as a per-map-type extra flag This patch adds the libbpf infrastructure for supporting a per-map-type "map_extra" field, whose definition will be idiosyncratic depending on map type. For example, for the bloom filter map, the lower 4 bits of map_extra is used to denote the number of hash functions. Please note that until libbpf 1.0 is here, the "bpf_create_map_params" struct is used as a temporary means for propagating the map_extra field to the kernel. Signed-off-by: Joanne Koong Signed-off-by: Alexei Starovoitov Acked-by: Andrii Nakryiko Link: https://lore.kernel.org/bpf/20211027234504.30744-3-joannekoong@fb.com --- tools/lib/bpf/bpf.c | 27 ++++++++++++++++++++++- tools/lib/bpf/bpf_gen_internal.h | 2 +- tools/lib/bpf/gen_loader.c | 3 ++- tools/lib/bpf/libbpf.c | 38 +++++++++++++++++++++++++++----- tools/lib/bpf/libbpf.h | 3 +++ tools/lib/bpf/libbpf.map | 2 ++ tools/lib/bpf/libbpf_internal.h | 25 ++++++++++++++++++++- 7 files changed, 91 insertions(+), 9 deletions(-) diff --git a/tools/lib/bpf/bpf.c b/tools/lib/bpf/bpf.c index 7d1741ceaa329..fe4b6ebc9b8fc 100644 --- a/tools/lib/bpf/bpf.c +++ b/tools/lib/bpf/bpf.c @@ -77,7 +77,7 @@ static inline int sys_bpf_prog_load(union bpf_attr *attr, unsigned int size) return fd; } -int bpf_create_map_xattr(const struct bpf_create_map_attr *create_attr) +int libbpf__bpf_create_map_xattr(const struct bpf_create_map_params *create_attr) { union bpf_attr attr; int fd; @@ -102,11 +102,36 @@ int bpf_create_map_xattr(const struct bpf_create_map_attr *create_attr) create_attr->btf_vmlinux_value_type_id; else attr.inner_map_fd = create_attr->inner_map_fd; + attr.map_extra = create_attr->map_extra; fd = sys_bpf(BPF_MAP_CREATE, &attr, sizeof(attr)); return libbpf_err_errno(fd); } +int bpf_create_map_xattr(const struct bpf_create_map_attr *create_attr) +{ + struct bpf_create_map_params p = {}; + + p.map_type = create_attr->map_type; + p.key_size = create_attr->key_size; + p.value_size = create_attr->value_size; + p.max_entries = create_attr->max_entries; + p.map_flags = create_attr->map_flags; + p.name = create_attr->name; + p.numa_node = create_attr->numa_node; + p.btf_fd = create_attr->btf_fd; + p.btf_key_type_id = create_attr->btf_key_type_id; + p.btf_value_type_id = create_attr->btf_value_type_id; + p.map_ifindex = create_attr->map_ifindex; + if (p.map_type == BPF_MAP_TYPE_STRUCT_OPS) + p.btf_vmlinux_value_type_id = + create_attr->btf_vmlinux_value_type_id; + else + p.inner_map_fd = create_attr->inner_map_fd; + + return libbpf__bpf_create_map_xattr(&p); +} + int bpf_create_map_node(enum bpf_map_type map_type, const char *name, int key_size, int value_size, int max_entries, __u32 map_flags, int node) diff --git a/tools/lib/bpf/bpf_gen_internal.h b/tools/lib/bpf/bpf_gen_internal.h index 70eccbffefb19..b8d41d6fbc407 100644 --- a/tools/lib/bpf/bpf_gen_internal.h +++ b/tools/lib/bpf/bpf_gen_internal.h @@ -43,7 +43,7 @@ void bpf_gen__init(struct bpf_gen *gen, int log_level); int bpf_gen__finish(struct bpf_gen *gen); void bpf_gen__free(struct bpf_gen *gen); void bpf_gen__load_btf(struct bpf_gen *gen, const void *raw_data, __u32 raw_size); -void bpf_gen__map_create(struct bpf_gen *gen, struct bpf_create_map_attr *map_attr, int map_idx); +void bpf_gen__map_create(struct bpf_gen *gen, struct bpf_create_map_params *map_attr, int map_idx); struct bpf_prog_load_params; void bpf_gen__prog_load(struct bpf_gen *gen, struct bpf_prog_load_params *load_attr, int prog_idx); void bpf_gen__map_update_elem(struct bpf_gen *gen, int map_idx, void *value, __u32 value_size); diff --git a/tools/lib/bpf/gen_loader.c b/tools/lib/bpf/gen_loader.c index 937bfc7db41e7..e552484ae4a48 100644 --- a/tools/lib/bpf/gen_loader.c +++ b/tools/lib/bpf/gen_loader.c @@ -431,7 +431,7 @@ void bpf_gen__load_btf(struct bpf_gen *gen, const void *btf_raw_data, } void bpf_gen__map_create(struct bpf_gen *gen, - struct bpf_create_map_attr *map_attr, int map_idx) + struct bpf_create_map_params *map_attr, int map_idx) { int attr_size = offsetofend(union bpf_attr, btf_vmlinux_value_type_id); bool close_inner_map_fd = false; @@ -443,6 +443,7 @@ void bpf_gen__map_create(struct bpf_gen *gen, attr.key_size = map_attr->key_size; attr.value_size = map_attr->value_size; attr.map_flags = map_attr->map_flags; + attr.map_extra = map_attr->map_extra; memcpy(attr.map_name, map_attr->name, min((unsigned)strlen(map_attr->name), BPF_OBJ_NAME_LEN - 1)); attr.numa_node = map_attr->numa_node; diff --git a/tools/lib/bpf/libbpf.c b/tools/lib/bpf/libbpf.c index 59d39ce9f375d..232e4efe82e90 100644 --- a/tools/lib/bpf/libbpf.c +++ b/tools/lib/bpf/libbpf.c @@ -400,6 +400,7 @@ struct bpf_map { char *pin_path; bool pinned; bool reused; + __u64 map_extra; }; enum extern_type { @@ -2324,6 +2325,13 @@ int parse_btf_map_def(const char *map_name, struct btf *btf, } map_def->pinning = val; map_def->parts |= MAP_DEF_PINNING; + } else if (strcmp(name, "map_extra") == 0) { + __u32 map_extra; + + if (!get_map_field_int(map_name, btf, m, &map_extra)) + return -EINVAL; + map_def->map_extra = map_extra; + map_def->parts |= MAP_DEF_MAP_EXTRA; } else { if (strict) { pr_warn("map '%s': unknown field '%s'.\n", map_name, name); @@ -2348,6 +2356,7 @@ static void fill_map_from_def(struct bpf_map *map, const struct btf_map_def *def map->def.value_size = def->value_size; map->def.max_entries = def->max_entries; map->def.map_flags = def->map_flags; + map->map_extra = def->map_extra; map->numa_node = def->numa_node; map->btf_key_type_id = def->key_type_id; @@ -2371,7 +2380,10 @@ static void fill_map_from_def(struct bpf_map *map, const struct btf_map_def *def if (def->parts & MAP_DEF_MAX_ENTRIES) pr_debug("map '%s': found max_entries = %u.\n", map->name, def->max_entries); if (def->parts & MAP_DEF_MAP_FLAGS) - pr_debug("map '%s': found map_flags = %u.\n", map->name, def->map_flags); + pr_debug("map '%s': found map_flags = 0x%x.\n", map->name, def->map_flags); + if (def->parts & MAP_DEF_MAP_EXTRA) + pr_debug("map '%s': found map_extra = 0x%llx.\n", map->name, + (unsigned long long)def->map_extra); if (def->parts & MAP_DEF_PINNING) pr_debug("map '%s': found pinning = %u.\n", map->name, def->pinning); if (def->parts & MAP_DEF_NUMA_NODE) @@ -4210,6 +4222,7 @@ int bpf_map__reuse_fd(struct bpf_map *map, int fd) map->btf_key_type_id = info.btf_key_type_id; map->btf_value_type_id = info.btf_value_type_id; map->reused = true; + map->map_extra = info.map_extra; return 0; @@ -4724,7 +4737,8 @@ static bool map_is_reuse_compat(const struct bpf_map *map, int map_fd) map_info.key_size == map->def.key_size && map_info.value_size == map->def.value_size && map_info.max_entries == map->def.max_entries && - map_info.map_flags == map->def.map_flags); + map_info.map_flags == map->def.map_flags && + map_info.map_extra == map->map_extra); } static int @@ -4807,7 +4821,7 @@ static void bpf_map__destroy(struct bpf_map *map); static int bpf_object__create_map(struct bpf_object *obj, struct bpf_map *map, bool is_inner) { - struct bpf_create_map_attr create_attr; + struct bpf_create_map_params create_attr; struct bpf_map_def *def = &map->def; int err = 0; @@ -4821,6 +4835,7 @@ static int bpf_object__create_map(struct bpf_object *obj, struct bpf_map *map, b create_attr.key_size = def->key_size; create_attr.value_size = def->value_size; create_attr.numa_node = map->numa_node; + create_attr.map_extra = map->map_extra; if (def->type == BPF_MAP_TYPE_PERF_EVENT_ARRAY && !def->max_entries) { int nr_cpus; @@ -4895,7 +4910,7 @@ static int bpf_object__create_map(struct bpf_object *obj, struct bpf_map *map, b */ map->fd = 0; } else { - map->fd = bpf_create_map_xattr(&create_attr); + map->fd = libbpf__bpf_create_map_xattr(&create_attr); } if (map->fd < 0 && (create_attr.btf_key_type_id || create_attr.btf_value_type_id)) { @@ -4910,7 +4925,7 @@ static int bpf_object__create_map(struct bpf_object *obj, struct bpf_map *map, b create_attr.btf_value_type_id = 0; map->btf_key_type_id = 0; map->btf_value_type_id = 0; - map->fd = bpf_create_map_xattr(&create_attr); + map->fd = libbpf__bpf_create_map_xattr(&create_attr); } err = map->fd < 0 ? -errno : 0; @@ -8880,6 +8895,19 @@ int bpf_map__set_map_flags(struct bpf_map *map, __u32 flags) return 0; } +__u64 bpf_map__map_extra(const struct bpf_map *map) +{ + return map->map_extra; +} + +int bpf_map__set_map_extra(struct bpf_map *map, __u64 map_extra) +{ + if (map->fd >= 0) + return libbpf_err(-EBUSY); + map->map_extra = map_extra; + return 0; +} + __u32 bpf_map__numa_node(const struct bpf_map *map) { return map->numa_node; diff --git a/tools/lib/bpf/libbpf.h b/tools/lib/bpf/libbpf.h index defabdbe7760f..9de0f299706b1 100644 --- a/tools/lib/bpf/libbpf.h +++ b/tools/lib/bpf/libbpf.h @@ -600,6 +600,9 @@ LIBBPF_API __u32 bpf_map__btf_value_type_id(const struct bpf_map *map); /* get/set map if_index */ LIBBPF_API __u32 bpf_map__ifindex(const struct bpf_map *map); LIBBPF_API int bpf_map__set_ifindex(struct bpf_map *map, __u32 ifindex); +/* get/set map map_extra flags */ +LIBBPF_API __u64 bpf_map__map_extra(const struct bpf_map *map); +LIBBPF_API int bpf_map__set_map_extra(struct bpf_map *map, __u64 map_extra); typedef void (*bpf_map_clear_priv_t)(struct bpf_map *, void *); LIBBPF_API int bpf_map__set_priv(struct bpf_map *map, void *priv, diff --git a/tools/lib/bpf/libbpf.map b/tools/lib/bpf/libbpf.map index 15239c05659cd..43580eb477405 100644 --- a/tools/lib/bpf/libbpf.map +++ b/tools/lib/bpf/libbpf.map @@ -389,6 +389,8 @@ LIBBPF_0.5.0 { LIBBPF_0.6.0 { global: + bpf_map__map_extra; + bpf_map__set_map_extra; bpf_object__next_map; bpf_object__next_program; bpf_object__prev_map; diff --git a/tools/lib/bpf/libbpf_internal.h b/tools/lib/bpf/libbpf_internal.h index 13bc7950e3040..a6dd7e25747f3 100644 --- a/tools/lib/bpf/libbpf_internal.h +++ b/tools/lib/bpf/libbpf_internal.h @@ -193,8 +193,9 @@ enum map_def_parts { MAP_DEF_NUMA_NODE = 0x080, MAP_DEF_PINNING = 0x100, MAP_DEF_INNER_MAP = 0x200, + MAP_DEF_MAP_EXTRA = 0x400, - MAP_DEF_ALL = 0x3ff, /* combination of all above */ + MAP_DEF_ALL = 0x7ff, /* combination of all above */ }; struct btf_map_def { @@ -208,6 +209,7 @@ struct btf_map_def { __u32 map_flags; __u32 numa_node; __u32 pinning; + __u64 map_extra; }; int parse_btf_map_def(const char *map_name, struct btf *btf, @@ -303,6 +305,27 @@ struct bpf_prog_load_params { int libbpf__bpf_prog_load(const struct bpf_prog_load_params *load_attr); +struct bpf_create_map_params { + const char *name; + enum bpf_map_type map_type; + __u32 map_flags; + __u32 key_size; + __u32 value_size; + __u32 max_entries; + __u32 numa_node; + __u32 btf_fd; + __u32 btf_key_type_id; + __u32 btf_value_type_id; + __u32 map_ifindex; + union { + __u32 inner_map_fd; + __u32 btf_vmlinux_value_type_id; + }; + __u64 map_extra; +}; + +int libbpf__bpf_create_map_xattr(const struct bpf_create_map_params *create_attr); + struct btf *btf_get_from_fd(int btf_fd, struct btf *base_btf); void btf_get_kernel_prefix_kind(enum bpf_attach_type attach_type, const char **prefix, int *kind); From ed9109ad643cfbe69670a37cdbaf2da9f409fed0 Mon Sep 17 00:00:00 2001 From: Joanne Koong Date: Wed, 27 Oct 2021 16:45:02 -0700 Subject: [PATCH 147/181] selftests/bpf: Add bloom filter map test cases This patch adds test cases for bpf bloom filter maps. They include tests checking against invalid operations by userspace, tests for using the bloom filter map as an inner map, and a bpf program that queries the bloom filter map for values added by a userspace program. Signed-off-by: Joanne Koong Signed-off-by: Alexei Starovoitov Acked-by: Andrii Nakryiko Link: https://lore.kernel.org/bpf/20211027234504.30744-4-joannekoong@fb.com --- .../bpf/prog_tests/bloom_filter_map.c | 204 ++++++++++++++++++ .../selftests/bpf/progs/bloom_filter_map.c | 82 +++++++ 2 files changed, 286 insertions(+) create mode 100644 tools/testing/selftests/bpf/prog_tests/bloom_filter_map.c create mode 100644 tools/testing/selftests/bpf/progs/bloom_filter_map.c diff --git a/tools/testing/selftests/bpf/prog_tests/bloom_filter_map.c b/tools/testing/selftests/bpf/prog_tests/bloom_filter_map.c new file mode 100644 index 0000000000000..9aa3fbed918be --- /dev/null +++ b/tools/testing/selftests/bpf/prog_tests/bloom_filter_map.c @@ -0,0 +1,204 @@ +// SPDX-License-Identifier: GPL-2.0 +/* Copyright (c) 2021 Facebook */ + +#include +#include +#include "bloom_filter_map.skel.h" + +static void test_fail_cases(void) +{ + struct bpf_create_map_attr xattr = { + .name = "bloom_filter_map", + .map_type = BPF_MAP_TYPE_BLOOM_FILTER, + .max_entries = 100, + .value_size = 11, + }; + __u32 value; + int fd, err; + + /* Invalid key size */ + xattr.key_size = 4; + fd = bpf_create_map_xattr(&xattr); + if (!ASSERT_LT(fd, 0, "bpf_create_map bloom filter invalid key size")) + close(fd); + xattr.key_size = 0; + + /* Invalid value size */ + xattr.value_size = 0; + fd = bpf_create_map_xattr(&xattr); + if (!ASSERT_LT(fd, 0, "bpf_create_map bloom filter invalid value size 0")) + close(fd); + xattr.value_size = 11; + + /* Invalid max entries size */ + xattr.max_entries = 0; + fd = bpf_create_map_xattr(&xattr); + if (!ASSERT_LT(fd, 0, "bpf_create_map bloom filter invalid max entries size")) + close(fd); + xattr.max_entries = 100; + + /* Bloom filter maps do not support BPF_F_NO_PREALLOC */ + xattr.map_flags = BPF_F_NO_PREALLOC; + fd = bpf_create_map_xattr(&xattr); + if (!ASSERT_LT(fd, 0, "bpf_create_map bloom filter invalid flags")) + close(fd); + xattr.map_flags = 0; + + fd = bpf_create_map_xattr(&xattr); + if (!ASSERT_GE(fd, 0, "bpf_create_map bloom filter")) + return; + + /* Test invalid flags */ + err = bpf_map_update_elem(fd, NULL, &value, -1); + ASSERT_EQ(err, -EINVAL, "bpf_map_update_elem bloom filter invalid flags"); + + err = bpf_map_update_elem(fd, NULL, &value, BPF_EXIST); + ASSERT_EQ(err, -EINVAL, "bpf_map_update_elem bloom filter invalid flags"); + + err = bpf_map_update_elem(fd, NULL, &value, BPF_F_LOCK); + ASSERT_EQ(err, -EINVAL, "bpf_map_update_elem bloom filter invalid flags"); + + err = bpf_map_update_elem(fd, NULL, &value, BPF_NOEXIST); + ASSERT_EQ(err, -EINVAL, "bpf_map_update_elem bloom filter invalid flags"); + + err = bpf_map_update_elem(fd, NULL, &value, 10000); + ASSERT_EQ(err, -EINVAL, "bpf_map_update_elem bloom filter invalid flags"); + + close(fd); +} + +static void check_bloom(struct bloom_filter_map *skel) +{ + struct bpf_link *link; + + link = bpf_program__attach(skel->progs.check_bloom); + if (!ASSERT_OK_PTR(link, "link")) + return; + + syscall(SYS_getpgid); + + ASSERT_EQ(skel->bss->error, 0, "error"); + + bpf_link__destroy(link); +} + +static void test_inner_map(struct bloom_filter_map *skel, const __u32 *rand_vals, + __u32 nr_rand_vals) +{ + int outer_map_fd, inner_map_fd, err, i, key = 0; + struct bpf_create_map_attr xattr = { + .name = "bloom_filter_inner_map", + .map_type = BPF_MAP_TYPE_BLOOM_FILTER, + .value_size = sizeof(__u32), + .max_entries = nr_rand_vals, + }; + struct bpf_link *link; + + /* Create a bloom filter map that will be used as the inner map */ + inner_map_fd = bpf_create_map_xattr(&xattr); + if (!ASSERT_GE(inner_map_fd, 0, "bpf_create_map bloom filter inner map")) + return; + + for (i = 0; i < nr_rand_vals; i++) { + err = bpf_map_update_elem(inner_map_fd, NULL, rand_vals + i, BPF_ANY); + if (!ASSERT_OK(err, "Add random value to inner_map_fd")) + goto done; + } + + /* Add the bloom filter map to the outer map */ + outer_map_fd = bpf_map__fd(skel->maps.outer_map); + err = bpf_map_update_elem(outer_map_fd, &key, &inner_map_fd, BPF_ANY); + if (!ASSERT_OK(err, "Add bloom filter map to outer map")) + goto done; + + /* Attach the bloom_filter_inner_map prog */ + link = bpf_program__attach(skel->progs.inner_map); + if (!ASSERT_OK_PTR(link, "link")) + goto delete_inner_map; + + syscall(SYS_getpgid); + + ASSERT_EQ(skel->bss->error, 0, "error"); + + bpf_link__destroy(link); + +delete_inner_map: + /* Ensure the inner bloom filter map can be deleted */ + err = bpf_map_delete_elem(outer_map_fd, &key); + ASSERT_OK(err, "Delete inner bloom filter map"); + +done: + close(inner_map_fd); +} + +static int setup_progs(struct bloom_filter_map **out_skel, __u32 **out_rand_vals, + __u32 *out_nr_rand_vals) +{ + struct bloom_filter_map *skel; + int random_data_fd, bloom_fd; + __u32 *rand_vals = NULL; + __u32 map_size, val; + int err, i; + + /* Set up a bloom filter map skeleton */ + skel = bloom_filter_map__open_and_load(); + if (!ASSERT_OK_PTR(skel, "bloom_filter_map__open_and_load")) + return -EINVAL; + + /* Set up rand_vals */ + map_size = bpf_map__max_entries(skel->maps.map_random_data); + rand_vals = malloc(sizeof(*rand_vals) * map_size); + if (!rand_vals) { + err = -ENOMEM; + goto error; + } + + /* Generate random values and populate both skeletons */ + random_data_fd = bpf_map__fd(skel->maps.map_random_data); + bloom_fd = bpf_map__fd(skel->maps.map_bloom); + for (i = 0; i < map_size; i++) { + val = rand(); + + err = bpf_map_update_elem(random_data_fd, &i, &val, BPF_ANY); + if (!ASSERT_OK(err, "Add random value to map_random_data")) + goto error; + + err = bpf_map_update_elem(bloom_fd, NULL, &val, BPF_ANY); + if (!ASSERT_OK(err, "Add random value to map_bloom")) + goto error; + + rand_vals[i] = val; + } + + *out_skel = skel; + *out_rand_vals = rand_vals; + *out_nr_rand_vals = map_size; + + return 0; + +error: + bloom_filter_map__destroy(skel); + if (rand_vals) + free(rand_vals); + return err; +} + +void test_bloom_filter_map(void) +{ + __u32 *rand_vals, nr_rand_vals; + struct bloom_filter_map *skel; + int err; + + test_fail_cases(); + + err = setup_progs(&skel, &rand_vals, &nr_rand_vals); + if (err) + return; + + test_inner_map(skel, rand_vals, nr_rand_vals); + free(rand_vals); + + check_bloom(skel); + + bloom_filter_map__destroy(skel); +} diff --git a/tools/testing/selftests/bpf/progs/bloom_filter_map.c b/tools/testing/selftests/bpf/progs/bloom_filter_map.c new file mode 100644 index 0000000000000..1316f3db79d99 --- /dev/null +++ b/tools/testing/selftests/bpf/progs/bloom_filter_map.c @@ -0,0 +1,82 @@ +// SPDX-License-Identifier: GPL-2.0 +/* Copyright (c) 2021 Facebook */ + +#include +#include + +char _license[] SEC("license") = "GPL"; + +struct bpf_map; + +struct { + __uint(type, BPF_MAP_TYPE_ARRAY); + __type(key, __u32); + __type(value, __u32); + __uint(max_entries, 1000); +} map_random_data SEC(".maps"); + +struct map_bloom_type { + __uint(type, BPF_MAP_TYPE_BLOOM_FILTER); + __type(value, __u32); + __uint(max_entries, 10000); + __uint(map_extra, 5); +} map_bloom SEC(".maps"); + +struct { + __uint(type, BPF_MAP_TYPE_ARRAY_OF_MAPS); + __type(key, int); + __type(value, int); + __uint(max_entries, 1); + __array(values, struct map_bloom_type); +} outer_map SEC(".maps"); + +struct callback_ctx { + struct bpf_map *map; +}; + +int error = 0; + +static __u64 +check_elem(struct bpf_map *map, __u32 *key, __u32 *val, + struct callback_ctx *data) +{ + int err; + + err = bpf_map_peek_elem(data->map, val); + if (err) { + error |= 1; + return 1; /* stop the iteration */ + } + + return 0; +} + +SEC("fentry/__x64_sys_getpgid") +int inner_map(void *ctx) +{ + struct bpf_map *inner_map; + struct callback_ctx data; + int key = 0; + + inner_map = bpf_map_lookup_elem(&outer_map, &key); + if (!inner_map) { + error |= 2; + return 0; + } + + data.map = inner_map; + bpf_for_each_map_elem(&map_random_data, check_elem, &data, 0); + + return 0; +} + +SEC("fentry/__x64_sys_getpgid") +int check_bloom(void *ctx) +{ + struct callback_ctx data; + + data.map = (struct bpf_map *)&map_bloom; + bpf_for_each_map_elem(&map_random_data, check_elem, &data, 0); + + return 0; +} From 57fd1c63c9a687c5fdc86fa628c490d6733e8d0b Mon Sep 17 00:00:00 2001 From: Joanne Koong Date: Wed, 27 Oct 2021 16:45:03 -0700 Subject: [PATCH 148/181] bpf/benchs: Add benchmark tests for bloom filter throughput + false positive This patch adds benchmark tests for the throughput (for lookups + updates) and the false positive rate of bloom filter lookups, as well as some minor refactoring of the bash script for running the benchmarks. These benchmarks show that as the number of hash functions increases, the throughput and the false positive rate of the bloom filter decreases. >From the benchmark data, the approximate average false-positive rates are roughly as follows: 1 hash function = ~30% 2 hash functions = ~15% 3 hash functions = ~5% 4 hash functions = ~2.5% 5 hash functions = ~1% 6 hash functions = ~0.5% 7 hash functions = ~0.35% 8 hash functions = ~0.15% 9 hash functions = ~0.1% 10 hash functions = ~0% For reference data, the benchmarks run on one thread on a machine with one numa node for 1 to 5 hash functions for 8-byte and 64-byte values are as follows: 1 hash function: 50k entries 8-byte value Lookups - 51.1 M/s operations Updates - 33.6 M/s operations False positive rate: 24.15% 64-byte value Lookups - 15.7 M/s operations Updates - 15.1 M/s operations False positive rate: 24.2% 100k entries 8-byte value Lookups - 51.0 M/s operations Updates - 33.4 M/s operations False positive rate: 24.04% 64-byte value Lookups - 15.6 M/s operations Updates - 14.6 M/s operations False positive rate: 24.06% 500k entries 8-byte value Lookups - 50.5 M/s operations Updates - 33.1 M/s operations False positive rate: 27.45% 64-byte value Lookups - 15.6 M/s operations Updates - 14.2 M/s operations False positive rate: 27.42% 1 mil entries 8-byte value Lookups - 49.7 M/s operations Updates - 32.9 M/s operations False positive rate: 27.45% 64-byte value Lookups - 15.4 M/s operations Updates - 13.7 M/s operations False positive rate: 27.58% 2.5 mil entries 8-byte value Lookups - 47.2 M/s operations Updates - 31.8 M/s operations False positive rate: 30.94% 64-byte value Lookups - 15.3 M/s operations Updates - 13.2 M/s operations False positive rate: 30.95% 5 mil entries 8-byte value Lookups - 41.1 M/s operations Updates - 28.1 M/s operations False positive rate: 31.01% 64-byte value Lookups - 13.3 M/s operations Updates - 11.4 M/s operations False positive rate: 30.98% 2 hash functions: 50k entries 8-byte value Lookups - 34.1 M/s operations Updates - 20.1 M/s operations False positive rate: 9.13% 64-byte value Lookups - 8.4 M/s operations Updates - 7.9 M/s operations False positive rate: 9.21% 100k entries 8-byte value Lookups - 33.7 M/s operations Updates - 18.9 M/s operations False positive rate: 9.13% 64-byte value Lookups - 8.4 M/s operations Updates - 7.7 M/s operations False positive rate: 9.19% 500k entries 8-byte value Lookups - 32.7 M/s operations Updates - 18.1 M/s operations False positive rate: 12.61% 64-byte value Lookups - 8.4 M/s operations Updates - 7.5 M/s operations False positive rate: 12.61% 1 mil entries 8-byte value Lookups - 30.6 M/s operations Updates - 18.9 M/s operations False positive rate: 12.54% 64-byte value Lookups - 8.0 M/s operations Updates - 7.0 M/s operations False positive rate: 12.52% 2.5 mil entries 8-byte value Lookups - 25.3 M/s operations Updates - 16.7 M/s operations False positive rate: 16.77% 64-byte value Lookups - 7.9 M/s operations Updates - 6.5 M/s operations False positive rate: 16.88% 5 mil entries 8-byte value Lookups - 20.8 M/s operations Updates - 14.7 M/s operations False positive rate: 16.78% 64-byte value Lookups - 7.0 M/s operations Updates - 6.0 M/s operations False positive rate: 16.78% 3 hash functions: 50k entries 8-byte value Lookups - 25.1 M/s operations Updates - 14.6 M/s operations False positive rate: 7.65% 64-byte value Lookups - 5.8 M/s operations Updates - 5.5 M/s operations False positive rate: 7.58% 100k entries 8-byte value Lookups - 24.7 M/s operations Updates - 14.1 M/s operations False positive rate: 7.71% 64-byte value Lookups - 5.8 M/s operations Updates - 5.3 M/s operations False positive rate: 7.62% 500k entries 8-byte value Lookups - 22.9 M/s operations Updates - 13.9 M/s operations False positive rate: 2.62% 64-byte value Lookups - 5.6 M/s operations Updates - 4.8 M/s operations False positive rate: 2.7% 1 mil entries 8-byte value Lookups - 19.8 M/s operations Updates - 12.6 M/s operations False positive rate: 2.60% 64-byte value Lookups - 5.3 M/s operations Updates - 4.4 M/s operations False positive rate: 2.69% 2.5 mil entries 8-byte value Lookups - 16.2 M/s operations Updates - 10.7 M/s operations False positive rate: 4.49% 64-byte value Lookups - 4.9 M/s operations Updates - 4.1 M/s operations False positive rate: 4.41% 5 mil entries 8-byte value Lookups - 18.8 M/s operations Updates - 9.2 M/s operations False positive rate: 4.45% 64-byte value Lookups - 5.2 M/s operations Updates - 3.9 M/s operations False positive rate: 4.54% 4 hash functions: 50k entries 8-byte value Lookups - 19.7 M/s operations Updates - 11.1 M/s operations False positive rate: 1.01% 64-byte value Lookups - 4.4 M/s operations Updates - 4.0 M/s operations False positive rate: 1.00% 100k entries 8-byte value Lookups - 19.5 M/s operations Updates - 10.9 M/s operations False positive rate: 1.00% 64-byte value Lookups - 4.3 M/s operations Updates - 3.9 M/s operations False positive rate: 0.97% 500k entries 8-byte value Lookups - 18.2 M/s operations Updates - 10.6 M/s operations False positive rate: 2.05% 64-byte value Lookups - 4.3 M/s operations Updates - 3.7 M/s operations False positive rate: 2.05% 1 mil entries 8-byte value Lookups - 15.5 M/s operations Updates - 9.6 M/s operations False positive rate: 1.99% 64-byte value Lookups - 4.0 M/s operations Updates - 3.4 M/s operations False positive rate: 1.99% 2.5 mil entries 8-byte value Lookups - 13.8 M/s operations Updates - 7.7 M/s operations False positive rate: 3.91% 64-byte value Lookups - 3.7 M/s operations Updates - 3.6 M/s operations False positive rate: 3.78% 5 mil entries 8-byte value Lookups - 13.0 M/s operations Updates - 6.9 M/s operations False positive rate: 3.93% 64-byte value Lookups - 3.5 M/s operations Updates - 3.7 M/s operations False positive rate: 3.39% 5 hash functions: 50k entries 8-byte value Lookups - 16.4 M/s operations Updates - 9.1 M/s operations False positive rate: 0.78% 64-byte value Lookups - 3.5 M/s operations Updates - 3.2 M/s operations False positive rate: 0.77% 100k entries 8-byte value Lookups - 16.3 M/s operations Updates - 9.0 M/s operations False positive rate: 0.79% 64-byte value Lookups - 3.5 M/s operations Updates - 3.2 M/s operations False positive rate: 0.78% 500k entries 8-byte value Lookups - 15.1 M/s operations Updates - 8.8 M/s operations False positive rate: 1.82% 64-byte value Lookups - 3.4 M/s operations Updates - 3.0 M/s operations False positive rate: 1.78% 1 mil entries 8-byte value Lookups - 13.2 M/s operations Updates - 7.8 M/s operations False positive rate: 1.81% 64-byte value Lookups - 3.2 M/s operations Updates - 2.8 M/s operations False positive rate: 1.80% 2.5 mil entries 8-byte value Lookups - 10.5 M/s operations Updates - 5.9 M/s operations False positive rate: 0.29% 64-byte value Lookups - 3.2 M/s operations Updates - 2.4 M/s operations False positive rate: 0.28% 5 mil entries 8-byte value Lookups - 9.6 M/s operations Updates - 5.7 M/s operations False positive rate: 0.30% 64-byte value Lookups - 3.2 M/s operations Updates - 2.7 M/s operations False positive rate: 0.30% Signed-off-by: Joanne Koong Signed-off-by: Alexei Starovoitov Acked-by: Andrii Nakryiko Link: https://lore.kernel.org/bpf/20211027234504.30744-5-joannekoong@fb.com --- tools/testing/selftests/bpf/Makefile | 6 +- tools/testing/selftests/bpf/bench.c | 37 ++ tools/testing/selftests/bpf/bench.h | 3 + .../bpf/benchs/bench_bloom_filter_map.c | 420 ++++++++++++++++++ .../bpf/benchs/run_bench_bloom_filter_map.sh | 28 ++ .../bpf/benchs/run_bench_ringbufs.sh | 30 +- .../selftests/bpf/benchs/run_common.sh | 48 ++ .../selftests/bpf/progs/bloom_filter_bench.c | 153 +++++++ 8 files changed, 695 insertions(+), 30 deletions(-) create mode 100644 tools/testing/selftests/bpf/benchs/bench_bloom_filter_map.c create mode 100755 tools/testing/selftests/bpf/benchs/run_bench_bloom_filter_map.sh create mode 100644 tools/testing/selftests/bpf/benchs/run_common.sh create mode 100644 tools/testing/selftests/bpf/progs/bloom_filter_bench.c diff --git a/tools/testing/selftests/bpf/Makefile b/tools/testing/selftests/bpf/Makefile index ac47cf9760fc5..2c464cb78d6b3 100644 --- a/tools/testing/selftests/bpf/Makefile +++ b/tools/testing/selftests/bpf/Makefile @@ -524,18 +524,20 @@ $(OUTPUT)/test_cpp: test_cpp.cpp $(OUTPUT)/test_core_extern.skel.h $(BPFOBJ) # Benchmark runner $(OUTPUT)/bench_%.o: benchs/bench_%.c bench.h $(BPFOBJ) $(call msg,CC,,$@) - $(Q)$(CC) $(CFLAGS) -c $(filter %.c,$^) $(LDLIBS) -o $@ + $(Q)$(CC) $(CFLAGS) -O2 -c $(filter %.c,$^) $(LDLIBS) -o $@ $(OUTPUT)/bench_rename.o: $(OUTPUT)/test_overhead.skel.h $(OUTPUT)/bench_trigger.o: $(OUTPUT)/trigger_bench.skel.h $(OUTPUT)/bench_ringbufs.o: $(OUTPUT)/ringbuf_bench.skel.h \ $(OUTPUT)/perfbuf_bench.skel.h +$(OUTPUT)/bench_bloom_filter_map.o: $(OUTPUT)/bloom_filter_bench.skel.h $(OUTPUT)/bench.o: bench.h testing_helpers.h $(BPFOBJ) $(OUTPUT)/bench: LDLIBS += -lm $(OUTPUT)/bench: $(OUTPUT)/bench.o $(OUTPUT)/testing_helpers.o \ $(OUTPUT)/bench_count.o \ $(OUTPUT)/bench_rename.o \ $(OUTPUT)/bench_trigger.o \ - $(OUTPUT)/bench_ringbufs.o + $(OUTPUT)/bench_ringbufs.o \ + $(OUTPUT)/bench_bloom_filter_map.o $(call msg,BINARY,,$@) $(Q)$(CC) $(LDFLAGS) -o $@ $(filter %.a %.o,$^) $(LDLIBS) diff --git a/tools/testing/selftests/bpf/bench.c b/tools/testing/selftests/bpf/bench.c index 6ea15b93a2f8a..a1d5dffe5ef61 100644 --- a/tools/testing/selftests/bpf/bench.c +++ b/tools/testing/selftests/bpf/bench.c @@ -51,6 +51,35 @@ void setup_libbpf() fprintf(stderr, "failed to increase RLIMIT_MEMLOCK: %d", err); } +void false_hits_report_progress(int iter, struct bench_res *res, long delta_ns) +{ + long total = res->false_hits + res->hits + res->drops; + + printf("Iter %3d (%7.3lfus): ", + iter, (delta_ns - 1000000000) / 1000.0); + + printf("%ld false hits of %ld total operations. Percentage = %2.2f %%\n", + res->false_hits, total, ((float)res->false_hits / total) * 100); +} + +void false_hits_report_final(struct bench_res res[], int res_cnt) +{ + long total_hits = 0, total_drops = 0, total_false_hits = 0, total_ops = 0; + int i; + + for (i = 0; i < res_cnt; i++) { + total_hits += res[i].hits; + total_false_hits += res[i].false_hits; + total_drops += res[i].drops; + } + total_ops = total_hits + total_false_hits + total_drops; + + printf("Summary: %ld false hits of %ld total operations. ", + total_false_hits, total_ops); + printf("Percentage = %2.2f %%\n", + ((float)total_false_hits / total_ops) * 100); +} + void hits_drops_report_progress(int iter, struct bench_res *res, long delta_ns) { double hits_per_sec, drops_per_sec; @@ -132,9 +161,11 @@ static const struct argp_option opts[] = { }; extern struct argp bench_ringbufs_argp; +extern struct argp bench_bloom_map_argp; static const struct argp_child bench_parsers[] = { { &bench_ringbufs_argp, 0, "Ring buffers benchmark", 0 }, + { &bench_bloom_map_argp, 0, "Bloom filter map benchmark", 0 }, {}, }; @@ -323,6 +354,9 @@ extern const struct bench bench_rb_libbpf; extern const struct bench bench_rb_custom; extern const struct bench bench_pb_libbpf; extern const struct bench bench_pb_custom; +extern const struct bench bench_bloom_lookup; +extern const struct bench bench_bloom_update; +extern const struct bench bench_bloom_false_positive; static const struct bench *benchs[] = { &bench_count_global, @@ -344,6 +378,9 @@ static const struct bench *benchs[] = { &bench_rb_custom, &bench_pb_libbpf, &bench_pb_custom, + &bench_bloom_lookup, + &bench_bloom_update, + &bench_bloom_false_positive, }; static void setup_benchmark() diff --git a/tools/testing/selftests/bpf/bench.h b/tools/testing/selftests/bpf/bench.h index c1f48a473b022..624c6b11501f9 100644 --- a/tools/testing/selftests/bpf/bench.h +++ b/tools/testing/selftests/bpf/bench.h @@ -33,6 +33,7 @@ struct env { struct bench_res { long hits; long drops; + long false_hits; }; struct bench { @@ -56,6 +57,8 @@ extern const struct bench *bench; void setup_libbpf(); void hits_drops_report_progress(int iter, struct bench_res *res, long delta_ns); void hits_drops_report_final(struct bench_res res[], int res_cnt); +void false_hits_report_progress(int iter, struct bench_res *res, long delta_ns); +void false_hits_report_final(struct bench_res res[], int res_cnt); static inline __u64 get_time_ns() { struct timespec t; diff --git a/tools/testing/selftests/bpf/benchs/bench_bloom_filter_map.c b/tools/testing/selftests/bpf/benchs/bench_bloom_filter_map.c new file mode 100644 index 0000000000000..4bafad418a8ae --- /dev/null +++ b/tools/testing/selftests/bpf/benchs/bench_bloom_filter_map.c @@ -0,0 +1,420 @@ +// SPDX-License-Identifier: GPL-2.0 +/* Copyright (c) 2021 Facebook */ + +#include +#include +#include +#include "bench.h" +#include "bloom_filter_bench.skel.h" +#include "bpf_util.h" + +static struct ctx { + bool use_array_map; + bool use_hashmap; + bool hashmap_use_bloom; + bool count_false_hits; + + struct bloom_filter_bench *skel; + + int bloom_fd; + int hashmap_fd; + int array_map_fd; + + pthread_mutex_t map_done_mtx; + pthread_cond_t map_done_cv; + bool map_done; + bool map_prepare_err; + + __u32 next_map_idx; +} ctx = { + .map_done_mtx = PTHREAD_MUTEX_INITIALIZER, + .map_done_cv = PTHREAD_COND_INITIALIZER, +}; + +struct stat { + __u32 stats[3]; +}; + +static struct { + __u32 nr_entries; + __u8 nr_hash_funcs; + __u8 value_size; +} args = { + .nr_entries = 1000, + .nr_hash_funcs = 3, + .value_size = 8, +}; + +enum { + ARG_NR_ENTRIES = 3000, + ARG_NR_HASH_FUNCS = 3001, + ARG_VALUE_SIZE = 3002, +}; + +static const struct argp_option opts[] = { + { "nr_entries", ARG_NR_ENTRIES, "NR_ENTRIES", 0, + "Set number of expected unique entries in the bloom filter"}, + { "nr_hash_funcs", ARG_NR_HASH_FUNCS, "NR_HASH_FUNCS", 0, + "Set number of hash functions in the bloom filter"}, + { "value_size", ARG_VALUE_SIZE, "VALUE_SIZE", 0, + "Set value size (in bytes) of bloom filter entries"}, + {}, +}; + +static error_t parse_arg(int key, char *arg, struct argp_state *state) +{ + switch (key) { + case ARG_NR_ENTRIES: + args.nr_entries = strtol(arg, NULL, 10); + if (args.nr_entries == 0) { + fprintf(stderr, "Invalid nr_entries count."); + argp_usage(state); + } + break; + case ARG_NR_HASH_FUNCS: + args.nr_hash_funcs = strtol(arg, NULL, 10); + if (args.nr_hash_funcs == 0 || args.nr_hash_funcs > 15) { + fprintf(stderr, + "The bloom filter must use 1 to 15 hash functions."); + argp_usage(state); + } + break; + case ARG_VALUE_SIZE: + args.value_size = strtol(arg, NULL, 10); + if (args.value_size < 2 || args.value_size > 256) { + fprintf(stderr, + "Invalid value size. Must be between 2 and 256 bytes"); + argp_usage(state); + } + break; + default: + return ARGP_ERR_UNKNOWN; + } + + return 0; +} + +/* exported into benchmark runner */ +const struct argp bench_bloom_map_argp = { + .options = opts, + .parser = parse_arg, +}; + +static void validate(void) +{ + if (env.consumer_cnt != 1) { + fprintf(stderr, + "The bloom filter benchmarks do not support multi-consumer use\n"); + exit(1); + } +} + +static inline void trigger_bpf_program(void) +{ + syscall(__NR_getpgid); +} + +static void *producer(void *input) +{ + while (true) + trigger_bpf_program(); + + return NULL; +} + +static void *map_prepare_thread(void *arg) +{ + __u32 val_size, i; + void *val = NULL; + int err; + + val_size = args.value_size; + val = malloc(val_size); + if (!val) { + ctx.map_prepare_err = true; + goto done; + } + + while (true) { + i = __atomic_add_fetch(&ctx.next_map_idx, 1, __ATOMIC_RELAXED); + if (i > args.nr_entries) + break; + +again: + /* Populate hashmap, bloom filter map, and array map with the same + * random values + */ + err = syscall(__NR_getrandom, val, val_size, 0); + if (err != val_size) { + ctx.map_prepare_err = true; + fprintf(stderr, "failed to get random value: %d\n", -errno); + break; + } + + if (ctx.use_hashmap) { + err = bpf_map_update_elem(ctx.hashmap_fd, val, val, BPF_NOEXIST); + if (err) { + if (err != -EEXIST) { + ctx.map_prepare_err = true; + fprintf(stderr, "failed to add elem to hashmap: %d\n", + -errno); + break; + } + goto again; + } + } + + i--; + + if (ctx.use_array_map) { + err = bpf_map_update_elem(ctx.array_map_fd, &i, val, 0); + if (err) { + ctx.map_prepare_err = true; + fprintf(stderr, "failed to add elem to array map: %d\n", -errno); + break; + } + } + + if (ctx.use_hashmap && !ctx.hashmap_use_bloom) + continue; + + err = bpf_map_update_elem(ctx.bloom_fd, NULL, val, 0); + if (err) { + ctx.map_prepare_err = true; + fprintf(stderr, + "failed to add elem to bloom filter map: %d\n", -errno); + break; + } + } +done: + pthread_mutex_lock(&ctx.map_done_mtx); + ctx.map_done = true; + pthread_cond_signal(&ctx.map_done_cv); + pthread_mutex_unlock(&ctx.map_done_mtx); + + if (val) + free(val); + + return NULL; +} + +static void populate_maps(void) +{ + unsigned int nr_cpus = bpf_num_possible_cpus(); + pthread_t map_thread; + int i, err, nr_rand_bytes; + + ctx.bloom_fd = bpf_map__fd(ctx.skel->maps.bloom_map); + ctx.hashmap_fd = bpf_map__fd(ctx.skel->maps.hashmap); + ctx.array_map_fd = bpf_map__fd(ctx.skel->maps.array_map); + + for (i = 0; i < nr_cpus; i++) { + err = pthread_create(&map_thread, NULL, map_prepare_thread, + NULL); + if (err) { + fprintf(stderr, "failed to create pthread: %d\n", -errno); + exit(1); + } + } + + pthread_mutex_lock(&ctx.map_done_mtx); + while (!ctx.map_done) + pthread_cond_wait(&ctx.map_done_cv, &ctx.map_done_mtx); + pthread_mutex_unlock(&ctx.map_done_mtx); + + if (ctx.map_prepare_err) + exit(1); + + nr_rand_bytes = syscall(__NR_getrandom, ctx.skel->bss->rand_vals, + ctx.skel->rodata->nr_rand_bytes, 0); + if (nr_rand_bytes != ctx.skel->rodata->nr_rand_bytes) { + fprintf(stderr, "failed to get random bytes\n"); + exit(1); + } +} + +static void check_args(void) +{ + if (args.value_size < 8) { + __u64 nr_unique_entries = 1ULL << (args.value_size * 8); + + if (args.nr_entries > nr_unique_entries) { + fprintf(stderr, + "Not enough unique values for the nr_entries requested\n"); + exit(1); + } + } +} + +static struct bloom_filter_bench *setup_skeleton(void) +{ + struct bloom_filter_bench *skel; + + check_args(); + + setup_libbpf(); + + skel = bloom_filter_bench__open(); + if (!skel) { + fprintf(stderr, "failed to open skeleton\n"); + exit(1); + } + + skel->rodata->hashmap_use_bloom = ctx.hashmap_use_bloom; + skel->rodata->count_false_hits = ctx.count_false_hits; + + /* Resize number of entries */ + bpf_map__set_max_entries(skel->maps.hashmap, args.nr_entries); + + bpf_map__set_max_entries(skel->maps.array_map, args.nr_entries); + + bpf_map__set_max_entries(skel->maps.bloom_map, args.nr_entries); + + /* Set value size */ + bpf_map__set_value_size(skel->maps.array_map, args.value_size); + + bpf_map__set_value_size(skel->maps.bloom_map, args.value_size); + + bpf_map__set_value_size(skel->maps.hashmap, args.value_size); + + /* For the hashmap, we use the value as the key as well */ + bpf_map__set_key_size(skel->maps.hashmap, args.value_size); + + skel->bss->value_size = args.value_size; + + /* Set number of hash functions */ + bpf_map__set_map_extra(skel->maps.bloom_map, args.nr_hash_funcs); + + if (bloom_filter_bench__load(skel)) { + fprintf(stderr, "failed to load skeleton\n"); + exit(1); + } + + return skel; +} + +static void bloom_lookup_setup(void) +{ + struct bpf_link *link; + + ctx.use_array_map = true; + + ctx.skel = setup_skeleton(); + + populate_maps(); + + link = bpf_program__attach(ctx.skel->progs.bloom_lookup); + if (!link) { + fprintf(stderr, "failed to attach program!\n"); + exit(1); + } +} + +static void bloom_update_setup(void) +{ + struct bpf_link *link; + + ctx.use_array_map = true; + + ctx.skel = setup_skeleton(); + + populate_maps(); + + link = bpf_program__attach(ctx.skel->progs.bloom_update); + if (!link) { + fprintf(stderr, "failed to attach program!\n"); + exit(1); + } +} + +static void false_positive_setup(void) +{ + struct bpf_link *link; + + ctx.use_hashmap = true; + ctx.hashmap_use_bloom = true; + ctx.count_false_hits = true; + + ctx.skel = setup_skeleton(); + + populate_maps(); + + link = bpf_program__attach(ctx.skel->progs.bloom_hashmap_lookup); + if (!link) { + fprintf(stderr, "failed to attach program!\n"); + exit(1); + } +} + +static void measure(struct bench_res *res) +{ + unsigned long total_hits = 0, total_drops = 0, total_false_hits = 0; + static unsigned long last_hits, last_drops, last_false_hits; + unsigned int nr_cpus = bpf_num_possible_cpus(); + int hit_key, drop_key, false_hit_key; + int i; + + hit_key = ctx.skel->rodata->hit_key; + drop_key = ctx.skel->rodata->drop_key; + false_hit_key = ctx.skel->rodata->false_hit_key; + + if (ctx.skel->bss->error != 0) { + fprintf(stderr, "error (%d) when searching the bloom filter\n", + ctx.skel->bss->error); + exit(1); + } + + for (i = 0; i < nr_cpus; i++) { + struct stat *s = (void *)&ctx.skel->bss->percpu_stats[i]; + + total_hits += s->stats[hit_key]; + total_drops += s->stats[drop_key]; + total_false_hits += s->stats[false_hit_key]; + } + + res->hits = total_hits - last_hits; + res->drops = total_drops - last_drops; + res->false_hits = total_false_hits - last_false_hits; + + last_hits = total_hits; + last_drops = total_drops; + last_false_hits = total_false_hits; +} + +static void *consumer(void *input) +{ + return NULL; +} + +const struct bench bench_bloom_lookup = { + .name = "bloom-lookup", + .validate = validate, + .setup = bloom_lookup_setup, + .producer_thread = producer, + .consumer_thread = consumer, + .measure = measure, + .report_progress = hits_drops_report_progress, + .report_final = hits_drops_report_final, +}; + +const struct bench bench_bloom_update = { + .name = "bloom-update", + .validate = validate, + .setup = bloom_update_setup, + .producer_thread = producer, + .consumer_thread = consumer, + .measure = measure, + .report_progress = hits_drops_report_progress, + .report_final = hits_drops_report_final, +}; + +const struct bench bench_bloom_false_positive = { + .name = "bloom-false-positive", + .validate = validate, + .setup = false_positive_setup, + .producer_thread = producer, + .consumer_thread = consumer, + .measure = measure, + .report_progress = false_hits_report_progress, + .report_final = false_hits_report_final, +}; diff --git a/tools/testing/selftests/bpf/benchs/run_bench_bloom_filter_map.sh b/tools/testing/selftests/bpf/benchs/run_bench_bloom_filter_map.sh new file mode 100755 index 0000000000000..d03d0e5c91cd7 --- /dev/null +++ b/tools/testing/selftests/bpf/benchs/run_bench_bloom_filter_map.sh @@ -0,0 +1,28 @@ +#!/bin/bash +# SPDX-License-Identifier: GPL-2.0 + +source ./benchs/run_common.sh + +set -eufo pipefail + +header "Bloom filter map" +for v in 2 4 8 16 40; do +for t in 1 4 8 12 16; do +for h in {1..10}; do +subtitle "value_size: $v bytes, # threads: $t, # hashes: $h" + for e in 10000 50000 75000 100000 250000 500000 750000 1000000 2500000 5000000; do + printf "%'d entries -\n" $e + printf "\t" + summarize "Lookups, total operations: " \ + "$($RUN_BENCH -p $t --nr_hash_funcs $h --nr_entries $e --value_size $v bloom-lookup)" + printf "\t" + summarize "Updates, total operations: " \ + "$($RUN_BENCH -p $t --nr_hash_funcs $h --nr_entries $e --value_size $v bloom-update)" + printf "\t" + summarize_percentage "False positive rate: " \ + "$($RUN_BENCH -p $t --nr_hash_funcs $h --nr_entries $e --value_size $v bloom-false-positive)" + done + printf "\n" +done +done +done diff --git a/tools/testing/selftests/bpf/benchs/run_bench_ringbufs.sh b/tools/testing/selftests/bpf/benchs/run_bench_ringbufs.sh index af4aa04caba6a..ada028aa90073 100755 --- a/tools/testing/selftests/bpf/benchs/run_bench_ringbufs.sh +++ b/tools/testing/selftests/bpf/benchs/run_bench_ringbufs.sh @@ -1,34 +1,8 @@ #!/bin/bash -set -eufo pipefail - -RUN_BENCH="sudo ./bench -w3 -d10 -a" - -function hits() -{ - echo "$*" | sed -E "s/.*hits\s+([0-9]+\.[0-9]+ ± [0-9]+\.[0-9]+M\/s).*/\1/" -} - -function drops() -{ - echo "$*" | sed -E "s/.*drops\s+([0-9]+\.[0-9]+ ± [0-9]+\.[0-9]+M\/s).*/\1/" -} +source ./benchs/run_common.sh -function header() -{ - local len=${#1} - - printf "\n%s\n" "$1" - for i in $(seq 1 $len); do printf '='; done - printf '\n' -} - -function summarize() -{ - bench="$1" - summary=$(echo $2 | tail -n1) - printf "%-20s %s (drops %s)\n" "$bench" "$(hits $summary)" "$(drops $summary)" -} +set -eufo pipefail header "Single-producer, parallel producer" for b in rb-libbpf rb-custom pb-libbpf pb-custom; do diff --git a/tools/testing/selftests/bpf/benchs/run_common.sh b/tools/testing/selftests/bpf/benchs/run_common.sh new file mode 100644 index 0000000000000..670f23b037c43 --- /dev/null +++ b/tools/testing/selftests/bpf/benchs/run_common.sh @@ -0,0 +1,48 @@ +#!/bin/bash +# SPDX-License-Identifier: GPL-2.0 + +RUN_BENCH="sudo ./bench -w3 -d10 -a" + +function header() +{ + local len=${#1} + + printf "\n%s\n" "$1" + for i in $(seq 1 $len); do printf '='; done + printf '\n' +} + +function subtitle() +{ + local len=${#1} + printf "\t%s\n" "$1" +} + +function hits() +{ + echo "$*" | sed -E "s/.*hits\s+([0-9]+\.[0-9]+ ± [0-9]+\.[0-9]+M\/s).*/\1/" +} + +function drops() +{ + echo "$*" | sed -E "s/.*drops\s+([0-9]+\.[0-9]+ ± [0-9]+\.[0-9]+M\/s).*/\1/" +} + +function percentage() +{ + echo "$*" | sed -E "s/.*Percentage\s=\s+([0-9]+\.[0-9]+).*/\1/" +} + +function summarize() +{ + bench="$1" + summary=$(echo $2 | tail -n1) + printf "%-20s %s (drops %s)\n" "$bench" "$(hits $summary)" "$(drops $summary)" +} + +function summarize_percentage() +{ + bench="$1" + summary=$(echo $2 | tail -n1) + printf "%-20s %s%%\n" "$bench" "$(percentage $summary)" +} diff --git a/tools/testing/selftests/bpf/progs/bloom_filter_bench.c b/tools/testing/selftests/bpf/progs/bloom_filter_bench.c new file mode 100644 index 0000000000000..d9a88dd1ea656 --- /dev/null +++ b/tools/testing/selftests/bpf/progs/bloom_filter_bench.c @@ -0,0 +1,153 @@ +// SPDX-License-Identifier: GPL-2.0 +/* Copyright (c) 2021 Facebook */ + +#include +#include +#include +#include + +char _license[] SEC("license") = "GPL"; + +struct bpf_map; + +__u8 rand_vals[2500000]; +const __u32 nr_rand_bytes = 2500000; + +struct { + __uint(type, BPF_MAP_TYPE_ARRAY); + __uint(key_size, sizeof(__u32)); + /* max entries and value_size will be set programmatically. + * They are configurable from the userspace bench program. + */ +} array_map SEC(".maps"); + +struct { + __uint(type, BPF_MAP_TYPE_BLOOM_FILTER); + /* max entries, value_size, and # of hash functions will be set + * programmatically. They are configurable from the userspace + * bench program. + */ + __uint(map_extra, 3); +} bloom_map SEC(".maps"); + +struct { + __uint(type, BPF_MAP_TYPE_HASH); + /* max entries, key_size, and value_size, will be set + * programmatically. They are configurable from the userspace + * bench program. + */ +} hashmap SEC(".maps"); + +struct callback_ctx { + struct bpf_map *map; + bool update; +}; + +/* Tracks the number of hits, drops, and false hits */ +struct { + __u32 stats[3]; +} __attribute__((__aligned__(256))) percpu_stats[256]; + +const __u32 hit_key = 0; +const __u32 drop_key = 1; +const __u32 false_hit_key = 2; + +__u8 value_size; + +const volatile bool hashmap_use_bloom; +const volatile bool count_false_hits; + +int error = 0; + +static __always_inline void log_result(__u32 key) +{ + __u32 cpu = bpf_get_smp_processor_id(); + + percpu_stats[cpu & 255].stats[key]++; +} + +static __u64 +bloom_callback(struct bpf_map *map, __u32 *key, void *val, + struct callback_ctx *data) +{ + int err; + + if (data->update) + err = bpf_map_push_elem(data->map, val, 0); + else + err = bpf_map_peek_elem(data->map, val); + + if (err) { + error |= 1; + return 1; /* stop the iteration */ + } + + log_result(hit_key); + + return 0; +} + +SEC("fentry/__x64_sys_getpgid") +int bloom_lookup(void *ctx) +{ + struct callback_ctx data; + + data.map = (struct bpf_map *)&bloom_map; + data.update = false; + + bpf_for_each_map_elem(&array_map, bloom_callback, &data, 0); + + return 0; +} + +SEC("fentry/__x64_sys_getpgid") +int bloom_update(void *ctx) +{ + struct callback_ctx data; + + data.map = (struct bpf_map *)&bloom_map; + data.update = true; + + bpf_for_each_map_elem(&array_map, bloom_callback, &data, 0); + + return 0; +} + +SEC("fentry/__x64_sys_getpgid") +int bloom_hashmap_lookup(void *ctx) +{ + __u64 *result; + int i, err; + + __u32 index = bpf_get_prandom_u32(); + __u32 bitmask = (1ULL << 21) - 1; + + for (i = 0; i < 1024; i++, index += value_size) { + index = index & bitmask; + + if (hashmap_use_bloom) { + err = bpf_map_peek_elem(&bloom_map, + rand_vals + index); + if (err) { + if (err != -ENOENT) { + error |= 2; + return 0; + } + log_result(hit_key); + continue; + } + } + + result = bpf_map_lookup_elem(&hashmap, + rand_vals + index); + if (result) { + log_result(hit_key); + } else { + if (hashmap_use_bloom && count_false_hits) + log_result(false_hit_key); + log_result(drop_key); + } + } + + return 0; +} From f44bc543a079c2ebc534cbfabd6fbfcfc2b09f72 Mon Sep 17 00:00:00 2001 From: Joanne Koong Date: Wed, 27 Oct 2021 16:45:04 -0700 Subject: [PATCH 149/181] bpf/benchs: Add benchmarks for comparing hashmap lookups w/ vs. w/out bloom filter This patch adds benchmark tests for comparing the performance of hashmap lookups without the bloom filter vs. hashmap lookups with the bloom filter. Checking the bloom filter first for whether the element exists should overall enable a higher throughput for hashmap lookups, since if the element does not exist in the bloom filter, we can avoid a costly lookup in the hashmap. On average, using 5 hash functions in the bloom filter tended to perform the best across the widest range of different entry sizes. The benchmark results using 5 hash functions (running on 8 threads on a machine with one numa node, and taking the average of 3 runs) were roughly as follows: value_size = 4 bytes - 10k entries: 30% faster 50k entries: 40% faster 100k entries: 40% faster 500k entres: 70% faster 1 million entries: 90% faster 5 million entries: 140% faster value_size = 8 bytes - 10k entries: 30% faster 50k entries: 40% faster 100k entries: 50% faster 500k entres: 80% faster 1 million entries: 100% faster 5 million entries: 150% faster value_size = 16 bytes - 10k entries: 20% faster 50k entries: 30% faster 100k entries: 35% faster 500k entres: 65% faster 1 million entries: 85% faster 5 million entries: 110% faster value_size = 40 bytes - 10k entries: 5% faster 50k entries: 15% faster 100k entries: 20% faster 500k entres: 65% faster 1 million entries: 75% faster 5 million entries: 120% faster Signed-off-by: Joanne Koong Signed-off-by: Alexei Starovoitov Link: https://lore.kernel.org/bpf/20211027234504.30744-6-joannekoong@fb.com --- tools/testing/selftests/bpf/bench.c | 23 ++++++-- .../bpf/benchs/bench_bloom_filter_map.c | 57 +++++++++++++++++++ .../bpf/benchs/run_bench_bloom_filter_map.sh | 17 ++++++ .../selftests/bpf/benchs/run_common.sh | 12 ++++ 4 files changed, 104 insertions(+), 5 deletions(-) diff --git a/tools/testing/selftests/bpf/bench.c b/tools/testing/selftests/bpf/bench.c index a1d5dffe5ef61..cc4722f693e9a 100644 --- a/tools/testing/selftests/bpf/bench.c +++ b/tools/testing/selftests/bpf/bench.c @@ -92,20 +92,22 @@ void hits_drops_report_progress(int iter, struct bench_res *res, long delta_ns) printf("Iter %3d (%7.3lfus): ", iter, (delta_ns - 1000000000) / 1000.0); - printf("hits %8.3lfM/s (%7.3lfM/prod), drops %8.3lfM/s\n", - hits_per_sec, hits_per_prod, drops_per_sec); + printf("hits %8.3lfM/s (%7.3lfM/prod), drops %8.3lfM/s, total operations %8.3lfM/s\n", + hits_per_sec, hits_per_prod, drops_per_sec, hits_per_sec + drops_per_sec); } void hits_drops_report_final(struct bench_res res[], int res_cnt) { int i; - double hits_mean = 0.0, drops_mean = 0.0; - double hits_stddev = 0.0, drops_stddev = 0.0; + double hits_mean = 0.0, drops_mean = 0.0, total_ops_mean = 0.0; + double hits_stddev = 0.0, drops_stddev = 0.0, total_ops_stddev = 0.0; + double total_ops; for (i = 0; i < res_cnt; i++) { hits_mean += res[i].hits / 1000000.0 / (0.0 + res_cnt); drops_mean += res[i].drops / 1000000.0 / (0.0 + res_cnt); } + total_ops_mean = hits_mean + drops_mean; if (res_cnt > 1) { for (i = 0; i < res_cnt; i++) { @@ -115,14 +117,21 @@ void hits_drops_report_final(struct bench_res res[], int res_cnt) drops_stddev += (drops_mean - res[i].drops / 1000000.0) * (drops_mean - res[i].drops / 1000000.0) / (res_cnt - 1.0); + total_ops = res[i].hits + res[i].drops; + total_ops_stddev += (total_ops_mean - total_ops / 1000000.0) * + (total_ops_mean - total_ops / 1000000.0) / + (res_cnt - 1.0); } hits_stddev = sqrt(hits_stddev); drops_stddev = sqrt(drops_stddev); + total_ops_stddev = sqrt(total_ops_stddev); } printf("Summary: hits %8.3lf \u00B1 %5.3lfM/s (%7.3lfM/prod), ", hits_mean, hits_stddev, hits_mean / env.producer_cnt); - printf("drops %8.3lf \u00B1 %5.3lfM/s\n", + printf("drops %8.3lf \u00B1 %5.3lfM/s, ", drops_mean, drops_stddev); + printf("total operations %8.3lf \u00B1 %5.3lfM/s\n", + total_ops_mean, total_ops_stddev); } const char *argp_program_version = "benchmark"; @@ -357,6 +366,8 @@ extern const struct bench bench_pb_custom; extern const struct bench bench_bloom_lookup; extern const struct bench bench_bloom_update; extern const struct bench bench_bloom_false_positive; +extern const struct bench bench_hashmap_without_bloom; +extern const struct bench bench_hashmap_with_bloom; static const struct bench *benchs[] = { &bench_count_global, @@ -381,6 +392,8 @@ static const struct bench *benchs[] = { &bench_bloom_lookup, &bench_bloom_update, &bench_bloom_false_positive, + &bench_hashmap_without_bloom, + &bench_hashmap_with_bloom, }; static void setup_benchmark() diff --git a/tools/testing/selftests/bpf/benchs/bench_bloom_filter_map.c b/tools/testing/selftests/bpf/benchs/bench_bloom_filter_map.c index 4bafad418a8ae..6eeeed2913e62 100644 --- a/tools/testing/selftests/bpf/benchs/bench_bloom_filter_map.c +++ b/tools/testing/selftests/bpf/benchs/bench_bloom_filter_map.c @@ -346,6 +346,41 @@ static void false_positive_setup(void) } } +static void hashmap_with_bloom_setup(void) +{ + struct bpf_link *link; + + ctx.use_hashmap = true; + ctx.hashmap_use_bloom = true; + + ctx.skel = setup_skeleton(); + + populate_maps(); + + link = bpf_program__attach(ctx.skel->progs.bloom_hashmap_lookup); + if (!link) { + fprintf(stderr, "failed to attach program!\n"); + exit(1); + } +} + +static void hashmap_no_bloom_setup(void) +{ + struct bpf_link *link; + + ctx.use_hashmap = true; + + ctx.skel = setup_skeleton(); + + populate_maps(); + + link = bpf_program__attach(ctx.skel->progs.bloom_hashmap_lookup); + if (!link) { + fprintf(stderr, "failed to attach program!\n"); + exit(1); + } +} + static void measure(struct bench_res *res) { unsigned long total_hits = 0, total_drops = 0, total_false_hits = 0; @@ -418,3 +453,25 @@ const struct bench bench_bloom_false_positive = { .report_progress = false_hits_report_progress, .report_final = false_hits_report_final, }; + +const struct bench bench_hashmap_without_bloom = { + .name = "hashmap-without-bloom", + .validate = validate, + .setup = hashmap_no_bloom_setup, + .producer_thread = producer, + .consumer_thread = consumer, + .measure = measure, + .report_progress = hits_drops_report_progress, + .report_final = hits_drops_report_final, +}; + +const struct bench bench_hashmap_with_bloom = { + .name = "hashmap-with-bloom", + .validate = validate, + .setup = hashmap_with_bloom_setup, + .producer_thread = producer, + .consumer_thread = consumer, + .measure = measure, + .report_progress = hits_drops_report_progress, + .report_final = hits_drops_report_final, +}; diff --git a/tools/testing/selftests/bpf/benchs/run_bench_bloom_filter_map.sh b/tools/testing/selftests/bpf/benchs/run_bench_bloom_filter_map.sh index d03d0e5c91cd7..8ffd385ab2f42 100755 --- a/tools/testing/selftests/bpf/benchs/run_bench_bloom_filter_map.sh +++ b/tools/testing/selftests/bpf/benchs/run_bench_bloom_filter_map.sh @@ -26,3 +26,20 @@ subtitle "value_size: $v bytes, # threads: $t, # hashes: $h" done done done + +header "Hashmap without bloom filter vs. hashmap with bloom filter (throughput, 8 threads)" +for v in 2 4 8 16 40; do +for h in {1..10}; do +subtitle "value_size: $v, # hashes: $h" + for e in 10000 50000 75000 100000 250000 500000 750000 1000000 2500000 5000000; do + printf "%'d entries -\n" $e + printf "\t" + summarize_total "Hashmap without bloom filter: " \ + "$($RUN_BENCH --nr_hash_funcs $h --nr_entries $e --value_size $v -p 8 hashmap-without-bloom)" + printf "\t" + summarize_total "Hashmap with bloom filter: " \ + "$($RUN_BENCH --nr_hash_funcs $h --nr_entries $e --value_size $v -p 8 hashmap-with-bloom)" + done + printf "\n" +done +done diff --git a/tools/testing/selftests/bpf/benchs/run_common.sh b/tools/testing/selftests/bpf/benchs/run_common.sh index 670f23b037c43..9a16be78b1809 100644 --- a/tools/testing/selftests/bpf/benchs/run_common.sh +++ b/tools/testing/selftests/bpf/benchs/run_common.sh @@ -33,6 +33,11 @@ function percentage() echo "$*" | sed -E "s/.*Percentage\s=\s+([0-9]+\.[0-9]+).*/\1/" } +function total() +{ + echo "$*" | sed -E "s/.*total operations\s+([0-9]+\.[0-9]+ ± [0-9]+\.[0-9]+M\/s).*/\1/" +} + function summarize() { bench="$1" @@ -46,3 +51,10 @@ function summarize_percentage() summary=$(echo $2 | tail -n1) printf "%-20s %s%%\n" "$bench" "$(percentage $summary)" } + +function summarize_total() +{ + bench="$1" + summary=$(echo $2 | tail -n1) + printf "%-20s %s\n" "$bench" "$(total $summary)" +} From d6aef08a872b9e23eecc92d0e92393473b13c497 Mon Sep 17 00:00:00 2001 From: Kumar Kartikeya Dwivedi Date: Thu, 28 Oct 2021 12:04:54 +0530 Subject: [PATCH 150/181] bpf: Add bpf_kallsyms_lookup_name helper This helper allows us to get the address of a kernel symbol from inside a BPF_PROG_TYPE_SYSCALL prog (used by gen_loader), so that we can relocate typeless ksym vars. Signed-off-by: Kumar Kartikeya Dwivedi Signed-off-by: Alexei Starovoitov Acked-by: Song Liu Link: https://lore.kernel.org/bpf/20211028063501.2239335-2-memxor@gmail.com --- include/linux/bpf.h | 1 + include/uapi/linux/bpf.h | 16 ++++++++++++++++ kernel/bpf/syscall.c | 27 +++++++++++++++++++++++++++ tools/include/uapi/linux/bpf.h | 16 ++++++++++++++++ 4 files changed, 60 insertions(+) diff --git a/include/linux/bpf.h b/include/linux/bpf.h index 50105e0b8fcc0..6deebf8bf78fb 100644 --- a/include/linux/bpf.h +++ b/include/linux/bpf.h @@ -2110,6 +2110,7 @@ extern const struct bpf_func_proto bpf_for_each_map_elem_proto; extern const struct bpf_func_proto bpf_btf_find_by_name_kind_proto; extern const struct bpf_func_proto bpf_sk_setsockopt_proto; extern const struct bpf_func_proto bpf_sk_getsockopt_proto; +extern const struct bpf_func_proto bpf_kallsyms_lookup_name_proto; const struct bpf_func_proto *tracing_prog_func_proto( enum bpf_func_id func_id, const struct bpf_prog *prog); diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h index 8bead4aa3ad04..bd0c9f0487f6c 100644 --- a/include/uapi/linux/bpf.h +++ b/include/uapi/linux/bpf.h @@ -4923,6 +4923,21 @@ union bpf_attr { * Dynamically cast a *sk* pointer to a *unix_sock* pointer. * Return * *sk* if casting is valid, or **NULL** otherwise. + * + * long bpf_kallsyms_lookup_name(const char *name, int name_sz, int flags, u64 *res) + * Description + * Get the address of a kernel symbol, returned in *res*. *res* is + * set to 0 if the symbol is not found. + * Return + * On success, zero. On error, a negative value. + * + * **-EINVAL** if *flags* is not zero. + * + * **-EINVAL** if string *name* is not the same size as *name_sz*. + * + * **-ENOENT** if symbol is not found. + * + * **-EPERM** if caller does not have permission to obtain kernel address. */ #define __BPF_FUNC_MAPPER(FN) \ FN(unspec), \ @@ -5104,6 +5119,7 @@ union bpf_attr { FN(get_branch_snapshot), \ FN(trace_vprintk), \ FN(skc_to_unix_sock), \ + FN(kallsyms_lookup_name), \ /* */ /* integer value in 'imm' field of BPF_CALL instruction selects which helper diff --git a/kernel/bpf/syscall.c b/kernel/bpf/syscall.c index f7c2c6354add9..e12a217ead34a 100644 --- a/kernel/bpf/syscall.c +++ b/kernel/bpf/syscall.c @@ -4781,6 +4781,31 @@ static const struct bpf_func_proto bpf_sys_close_proto = { .arg1_type = ARG_ANYTHING, }; +BPF_CALL_4(bpf_kallsyms_lookup_name, const char *, name, int, name_sz, int, flags, u64 *, res) +{ + if (flags) + return -EINVAL; + + if (name_sz <= 1 || name[name_sz - 1]) + return -EINVAL; + + if (!bpf_dump_raw_ok(current_cred())) + return -EPERM; + + *res = kallsyms_lookup_name(name); + return *res ? 0 : -ENOENT; +} + +const struct bpf_func_proto bpf_kallsyms_lookup_name_proto = { + .func = bpf_kallsyms_lookup_name, + .gpl_only = false, + .ret_type = RET_INTEGER, + .arg1_type = ARG_PTR_TO_MEM, + .arg2_type = ARG_CONST_SIZE, + .arg3_type = ARG_ANYTHING, + .arg4_type = ARG_PTR_TO_LONG, +}; + static const struct bpf_func_proto * syscall_prog_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog) { @@ -4791,6 +4816,8 @@ syscall_prog_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog) return &bpf_btf_find_by_name_kind_proto; case BPF_FUNC_sys_close: return &bpf_sys_close_proto; + case BPF_FUNC_kallsyms_lookup_name: + return &bpf_kallsyms_lookup_name_proto; default: return tracing_prog_func_proto(func_id, prog); } diff --git a/tools/include/uapi/linux/bpf.h b/tools/include/uapi/linux/bpf.h index 8bead4aa3ad04..bd0c9f0487f6c 100644 --- a/tools/include/uapi/linux/bpf.h +++ b/tools/include/uapi/linux/bpf.h @@ -4923,6 +4923,21 @@ union bpf_attr { * Dynamically cast a *sk* pointer to a *unix_sock* pointer. * Return * *sk* if casting is valid, or **NULL** otherwise. + * + * long bpf_kallsyms_lookup_name(const char *name, int name_sz, int flags, u64 *res) + * Description + * Get the address of a kernel symbol, returned in *res*. *res* is + * set to 0 if the symbol is not found. + * Return + * On success, zero. On error, a negative value. + * + * **-EINVAL** if *flags* is not zero. + * + * **-EINVAL** if string *name* is not the same size as *name_sz*. + * + * **-ENOENT** if symbol is not found. + * + * **-EPERM** if caller does not have permission to obtain kernel address. */ #define __BPF_FUNC_MAPPER(FN) \ FN(unspec), \ @@ -5104,6 +5119,7 @@ union bpf_attr { FN(get_branch_snapshot), \ FN(trace_vprintk), \ FN(skc_to_unix_sock), \ + FN(kallsyms_lookup_name), \ /* */ /* integer value in 'imm' field of BPF_CALL instruction selects which helper From c24941cd3766b6de682dbe6809bd6af12271ab5b Mon Sep 17 00:00:00 2001 From: Kumar Kartikeya Dwivedi Date: Thu, 28 Oct 2021 12:04:55 +0530 Subject: [PATCH 151/181] libbpf: Add typeless ksym support to gen_loader This uses the bpf_kallsyms_lookup_name helper added in previous patches to relocate typeless ksyms. The return value ENOENT can be ignored, and the value written to 'res' can be directly stored to the insn, as it is overwritten to 0 on lookup failure. For repeating symbols, we can simply copy the previously populated bpf_insn. Also, we need to take care to not close fds for typeless ksym_desc, so reuse the 'off' member's space to add a marker for typeless ksym and use that to skip them in cleanup_relos. We add a emit_ksym_relo_log helper that avoids duplicating common logging instructions between typeless and weak ksym (for future commit). Signed-off-by: Kumar Kartikeya Dwivedi Signed-off-by: Alexei Starovoitov Link: https://lore.kernel.org/bpf/20211028063501.2239335-3-memxor@gmail.com --- tools/lib/bpf/bpf_gen_internal.h | 12 +++- tools/lib/bpf/gen_loader.c | 97 ++++++++++++++++++++++++++++---- tools/lib/bpf/libbpf.c | 13 ++--- 3 files changed, 99 insertions(+), 23 deletions(-) diff --git a/tools/lib/bpf/bpf_gen_internal.h b/tools/lib/bpf/bpf_gen_internal.h index b8d41d6fbc407..d26e5472fe501 100644 --- a/tools/lib/bpf/bpf_gen_internal.h +++ b/tools/lib/bpf/bpf_gen_internal.h @@ -8,13 +8,19 @@ struct ksym_relo_desc { int kind; int insn_idx; bool is_weak; + bool is_typeless; }; struct ksym_desc { const char *name; int ref; int kind; - int off; + union { + /* used for kfunc */ + int off; + /* used for typeless ksym */ + bool typeless; + }; int insn; }; @@ -49,7 +55,7 @@ void bpf_gen__prog_load(struct bpf_gen *gen, struct bpf_prog_load_params *load_a void bpf_gen__map_update_elem(struct bpf_gen *gen, int map_idx, void *value, __u32 value_size); void bpf_gen__map_freeze(struct bpf_gen *gen, int map_idx); void bpf_gen__record_attach_target(struct bpf_gen *gen, const char *name, enum bpf_attach_type type); -void bpf_gen__record_extern(struct bpf_gen *gen, const char *name, bool is_weak, int kind, - int insn_idx); +void bpf_gen__record_extern(struct bpf_gen *gen, const char *name, bool is_weak, + bool is_typeless, int kind, int insn_idx); #endif diff --git a/tools/lib/bpf/gen_loader.c b/tools/lib/bpf/gen_loader.c index e552484ae4a48..bae065a9dbeca 100644 --- a/tools/lib/bpf/gen_loader.c +++ b/tools/lib/bpf/gen_loader.c @@ -560,7 +560,7 @@ static void emit_find_attach_target(struct bpf_gen *gen) } void bpf_gen__record_extern(struct bpf_gen *gen, const char *name, bool is_weak, - int kind, int insn_idx) + bool is_typeless, int kind, int insn_idx) { struct ksym_relo_desc *relo; @@ -573,6 +573,7 @@ void bpf_gen__record_extern(struct bpf_gen *gen, const char *name, bool is_weak, relo += gen->relo_cnt; relo->name = name; relo->is_weak = is_weak; + relo->is_typeless = is_typeless; relo->kind = kind; relo->insn_idx = insn_idx; gen->relo_cnt++; @@ -622,6 +623,29 @@ static void emit_bpf_find_by_name_kind(struct bpf_gen *gen, struct ksym_relo_des debug_ret(gen, "find_by_name_kind(%s,%d)", relo->name, relo->kind); } +/* Overwrites BPF_REG_{0, 1, 2, 3, 4, 7} + * Returns result in BPF_REG_7 + * Returns u64 symbol addr in BPF_REG_9 + */ +static void emit_bpf_kallsyms_lookup_name(struct bpf_gen *gen, struct ksym_relo_desc *relo) +{ + int name_off, len = strlen(relo->name) + 1, res_off; + + name_off = add_data(gen, relo->name, len); + res_off = add_data(gen, NULL, 8); /* res is u64 */ + emit2(gen, BPF_LD_IMM64_RAW_FULL(BPF_REG_1, BPF_PSEUDO_MAP_IDX_VALUE, + 0, 0, 0, name_off)); + emit(gen, BPF_MOV64_IMM(BPF_REG_2, len)); + emit(gen, BPF_MOV64_IMM(BPF_REG_3, 0)); + emit2(gen, BPF_LD_IMM64_RAW_FULL(BPF_REG_4, BPF_PSEUDO_MAP_IDX_VALUE, + 0, 0, 0, res_off)); + emit(gen, BPF_MOV64_REG(BPF_REG_7, BPF_REG_4)); + emit(gen, BPF_EMIT_CALL(BPF_FUNC_kallsyms_lookup_name)); + emit(gen, BPF_LDX_MEM(BPF_DW, BPF_REG_9, BPF_REG_7, 0)); + emit(gen, BPF_MOV64_REG(BPF_REG_7, BPF_REG_0)); + debug_ret(gen, "kallsyms_lookup_name(%s,%d)", relo->name, relo->kind); +} + /* Expects: * BPF_REG_8 - pointer to instruction * @@ -701,6 +725,58 @@ static void emit_relo_kfunc_btf(struct bpf_gen *gen, struct ksym_relo_desc *relo relo->name, kdesc->ref); } +static void emit_ksym_relo_log(struct bpf_gen *gen, struct ksym_relo_desc *relo, + int ref) +{ + if (!gen->log_level) + return; + emit(gen, BPF_LDX_MEM(BPF_W, BPF_REG_7, BPF_REG_8, + offsetof(struct bpf_insn, imm))); + emit(gen, BPF_LDX_MEM(BPF_H, BPF_REG_9, BPF_REG_8, sizeof(struct bpf_insn) + + offsetof(struct bpf_insn, imm))); + debug_regs(gen, BPF_REG_7, BPF_REG_9, " var t=%d w=%d (%s:count=%d): imm[0]: %%d, imm[1]: %%d", + relo->is_typeless, relo->is_weak, relo->name, ref); + emit(gen, BPF_LDX_MEM(BPF_B, BPF_REG_9, BPF_REG_8, offsetofend(struct bpf_insn, code))); + debug_regs(gen, BPF_REG_9, -1, " var t=%d w=%d (%s:count=%d): insn.reg", + relo->is_typeless, relo->is_weak, relo->name, ref); +} + +/* Expects: + * BPF_REG_8 - pointer to instruction + */ +static void emit_relo_ksym_typeless(struct bpf_gen *gen, + struct ksym_relo_desc *relo, int insn) +{ + struct ksym_desc *kdesc; + + kdesc = get_ksym_desc(gen, relo); + if (!kdesc) + return; + /* try to copy from existing ldimm64 insn */ + if (kdesc->ref > 1) { + move_blob2blob(gen, insn + offsetof(struct bpf_insn, imm), 4, + kdesc->insn + offsetof(struct bpf_insn, imm)); + move_blob2blob(gen, insn + sizeof(struct bpf_insn) + offsetof(struct bpf_insn, imm), 4, + kdesc->insn + sizeof(struct bpf_insn) + offsetof(struct bpf_insn, imm)); + goto log; + } + /* remember insn offset, so we can copy ksym addr later */ + kdesc->insn = insn; + /* skip typeless ksym_desc in fd closing loop in cleanup_relos */ + kdesc->typeless = true; + emit_bpf_kallsyms_lookup_name(gen, relo); + emit(gen, BPF_JMP_IMM(BPF_JEQ, BPF_REG_7, -ENOENT, 1)); + emit_check_err(gen); + /* store lower half of addr into insn[insn_idx].imm */ + emit(gen, BPF_STX_MEM(BPF_W, BPF_REG_8, BPF_REG_9, offsetof(struct bpf_insn, imm))); + /* store upper half of addr into insn[insn_idx + 1].imm */ + emit(gen, BPF_ALU64_IMM(BPF_RSH, BPF_REG_9, 32)); + emit(gen, BPF_STX_MEM(BPF_W, BPF_REG_8, BPF_REG_9, + sizeof(struct bpf_insn) + offsetof(struct bpf_insn, imm))); +log: + emit_ksym_relo_log(gen, relo, kdesc->ref); +} + /* Expects: * BPF_REG_8 - pointer to instruction */ @@ -730,14 +806,7 @@ static void emit_relo_ksym_btf(struct bpf_gen *gen, struct ksym_relo_desc *relo, emit(gen, BPF_STX_MEM(BPF_W, BPF_REG_8, BPF_REG_7, sizeof(struct bpf_insn) + offsetof(struct bpf_insn, imm))); log: - if (!gen->log_level) - return; - emit(gen, BPF_LDX_MEM(BPF_W, BPF_REG_7, BPF_REG_8, - offsetof(struct bpf_insn, imm))); - emit(gen, BPF_LDX_MEM(BPF_H, BPF_REG_9, BPF_REG_8, sizeof(struct bpf_insn) + - offsetof(struct bpf_insn, imm))); - debug_regs(gen, BPF_REG_7, BPF_REG_9, " var (%s:count=%d): imm: %%d, fd: %%d", - relo->name, kdesc->ref); + emit_ksym_relo_log(gen, relo, kdesc->ref); } static void emit_relo(struct bpf_gen *gen, struct ksym_relo_desc *relo, int insns) @@ -749,7 +818,10 @@ static void emit_relo(struct bpf_gen *gen, struct ksym_relo_desc *relo, int insn emit2(gen, BPF_LD_IMM64_RAW_FULL(BPF_REG_8, BPF_PSEUDO_MAP_IDX_VALUE, 0, 0, 0, insn)); switch (relo->kind) { case BTF_KIND_VAR: - emit_relo_ksym_btf(gen, relo, insn); + if (relo->is_typeless) + emit_relo_ksym_typeless(gen, relo, insn); + else + emit_relo_ksym_btf(gen, relo, insn); break; case BTF_KIND_FUNC: emit_relo_kfunc_btf(gen, relo, insn); @@ -774,12 +846,13 @@ static void cleanup_relos(struct bpf_gen *gen, int insns) int i, insn; for (i = 0; i < gen->nr_ksyms; i++) { - if (gen->ksyms[i].kind == BTF_KIND_VAR) { + /* only close fds for typed ksyms and kfuncs */ + if (gen->ksyms[i].kind == BTF_KIND_VAR && !gen->ksyms[i].typeless) { /* close fd recorded in insn[insn_idx + 1].imm */ insn = gen->ksyms[i].insn; insn += sizeof(struct bpf_insn) + offsetof(struct bpf_insn, imm); emit_sys_close_blob(gen, insn); - } else { /* BTF_KIND_FUNC */ + } else if (gen->ksyms[i].kind == BTF_KIND_FUNC) { emit_sys_close_blob(gen, blob_fd_array_off(gen, gen->ksyms[i].off)); if (gen->ksyms[i].off < MAX_FD_ARRAY_SZ) gen->nr_fd_array--; diff --git a/tools/lib/bpf/libbpf.c b/tools/lib/bpf/libbpf.c index 232e4efe82e90..73e1e70a722f6 100644 --- a/tools/lib/bpf/libbpf.c +++ b/tools/lib/bpf/libbpf.c @@ -6575,17 +6575,14 @@ static int bpf_program__record_externs(struct bpf_program *prog) case RELO_EXTERN_VAR: if (ext->type != EXT_KSYM) continue; - if (!ext->ksym.type_id) { - pr_warn("typeless ksym %s is not supported yet\n", - ext->name); - return -ENOTSUP; - } - bpf_gen__record_extern(obj->gen_loader, ext->name, ext->is_weak, + bpf_gen__record_extern(obj->gen_loader, ext->name, + ext->is_weak, !ext->ksym.type_id, BTF_KIND_VAR, relo->insn_idx); break; case RELO_EXTERN_FUNC: - bpf_gen__record_extern(obj->gen_loader, ext->name, ext->is_weak, - BTF_KIND_FUNC, relo->insn_idx); + bpf_gen__record_extern(obj->gen_loader, ext->name, + ext->is_weak, false, BTF_KIND_FUNC, + relo->insn_idx); break; default: continue; From 585a3571981d8a93e2211e1ac835d31a63e68cd8 Mon Sep 17 00:00:00 2001 From: Kumar Kartikeya Dwivedi Date: Thu, 28 Oct 2021 12:04:56 +0530 Subject: [PATCH 152/181] libbpf: Add weak ksym support to gen_loader This extends existing ksym relocation code to also support relocating weak ksyms. Care needs to be taken to zero out the src_reg (currently BPF_PSEUOD_BTF_ID, always set for gen_loader by bpf_object__relocate_data) when the BTF ID lookup fails at runtime. This is not a problem for libbpf as it only sets ext->is_set when BTF ID lookup succeeds (and only proceeds in case of failure if ext->is_weak, leading to src_reg remaining as 0 for weak unresolved ksym). A pattern similar to emit_relo_kfunc_btf is followed of first storing the default values and then jumping over actual stores in case of an error. For src_reg adjustment, we also need to perform it when copying the populated instruction, so depending on if copied insn[0].imm is 0 or not, we decide to jump over the adjustment. We cannot reach that point unless the ksym was weak and resolved and zeroed out, as the emit_check_err will cause us to jump to cleanup label, so we do not need to recheck whether the ksym is weak before doing the adjustment after copying BTF ID and BTF FD. This is consistent with how libbpf relocates weak ksym. Logging statements are added to show the relocation result and aid debugging. Signed-off-by: Kumar Kartikeya Dwivedi Signed-off-by: Alexei Starovoitov Link: https://lore.kernel.org/bpf/20211028063501.2239335-4-memxor@gmail.com --- tools/lib/bpf/gen_loader.c | 35 ++++++++++++++++++++++++++++++++--- 1 file changed, 32 insertions(+), 3 deletions(-) diff --git a/tools/lib/bpf/gen_loader.c b/tools/lib/bpf/gen_loader.c index bae065a9dbeca..502dea53a742d 100644 --- a/tools/lib/bpf/gen_loader.c +++ b/tools/lib/bpf/gen_loader.c @@ -13,6 +13,7 @@ #include "hashmap.h" #include "bpf_gen_internal.h" #include "skel_internal.h" +#include #define MAX_USED_MAPS 64 #define MAX_USED_PROGS 32 @@ -777,12 +778,24 @@ static void emit_relo_ksym_typeless(struct bpf_gen *gen, emit_ksym_relo_log(gen, relo, kdesc->ref); } +static __u32 src_reg_mask(void) +{ +#if defined(__LITTLE_ENDIAN_BITFIELD) + return 0x0f; /* src_reg,dst_reg,... */ +#elif defined(__BIG_ENDIAN_BITFIELD) + return 0xf0; /* dst_reg,src_reg,... */ +#else +#error "Unsupported bit endianness, cannot proceed" +#endif +} + /* Expects: * BPF_REG_8 - pointer to instruction */ static void emit_relo_ksym_btf(struct bpf_gen *gen, struct ksym_relo_desc *relo, int insn) { struct ksym_desc *kdesc; + __u32 reg_mask; kdesc = get_ksym_desc(gen, relo); if (!kdesc) @@ -793,19 +806,35 @@ static void emit_relo_ksym_btf(struct bpf_gen *gen, struct ksym_relo_desc *relo, kdesc->insn + offsetof(struct bpf_insn, imm)); move_blob2blob(gen, insn + sizeof(struct bpf_insn) + offsetof(struct bpf_insn, imm), 4, kdesc->insn + sizeof(struct bpf_insn) + offsetof(struct bpf_insn, imm)); - goto log; + emit(gen, BPF_LDX_MEM(BPF_W, BPF_REG_9, BPF_REG_8, offsetof(struct bpf_insn, imm))); + /* jump over src_reg adjustment if imm is not 0 */ + emit(gen, BPF_JMP_IMM(BPF_JNE, BPF_REG_9, 0, 3)); + goto clear_src_reg; } /* remember insn offset, so we can copy BTF ID and FD later */ kdesc->insn = insn; emit_bpf_find_by_name_kind(gen, relo); - emit_check_err(gen); + if (!relo->is_weak) + emit_check_err(gen); + /* set default values as 0 */ + emit(gen, BPF_ST_MEM(BPF_W, BPF_REG_8, offsetof(struct bpf_insn, imm), 0)); + emit(gen, BPF_ST_MEM(BPF_W, BPF_REG_8, sizeof(struct bpf_insn) + offsetof(struct bpf_insn, imm), 0)); + /* skip success case stores if ret < 0 */ + emit(gen, BPF_JMP_IMM(BPF_JSLT, BPF_REG_7, 0, 4)); /* store btf_id into insn[insn_idx].imm */ emit(gen, BPF_STX_MEM(BPF_W, BPF_REG_8, BPF_REG_7, offsetof(struct bpf_insn, imm))); /* store btf_obj_fd into insn[insn_idx + 1].imm */ emit(gen, BPF_ALU64_IMM(BPF_RSH, BPF_REG_7, 32)); emit(gen, BPF_STX_MEM(BPF_W, BPF_REG_8, BPF_REG_7, sizeof(struct bpf_insn) + offsetof(struct bpf_insn, imm))); -log: + emit(gen, BPF_JMP_IMM(BPF_JSGE, BPF_REG_7, 0, 3)); +clear_src_reg: + /* clear bpf_object__relocate_data's src_reg assignment, otherwise we get a verifier failure */ + reg_mask = src_reg_mask(); + emit(gen, BPF_LDX_MEM(BPF_B, BPF_REG_9, BPF_REG_8, offsetofend(struct bpf_insn, code))); + emit(gen, BPF_ALU32_IMM(BPF_AND, BPF_REG_9, reg_mask)); + emit(gen, BPF_STX_MEM(BPF_B, BPF_REG_8, BPF_REG_9, offsetofend(struct bpf_insn, code))); + emit_ksym_relo_log(gen, relo, kdesc->ref); } From 549a63238603103fa33cecd49487cf6c0f52e503 Mon Sep 17 00:00:00 2001 From: Kumar Kartikeya Dwivedi Date: Thu, 28 Oct 2021 12:04:57 +0530 Subject: [PATCH 153/181] libbpf: Ensure that BPF syscall fds are never 0, 1, or 2 Add a simple wrapper for passing an fd and getting a new one >= 3 if it is one of 0, 1, or 2. There are two primary reasons to make this change: First, libbpf relies on the assumption a certain BPF fd is never 0 (e.g. most recently noticed in [0]). Second, Alexei pointed out in [1] that some environments reset stdin, stdout, and stderr if they notice an invalid fd at these numbers. To protect against both these cases, switch all internal BPF syscall wrappers in libbpf to always return an fd >= 3. We only need to modify the syscall wrappers and not other code that assumes a valid fd by doing >= 0, to avoid pointless churn, and because it is still a valid assumption. The cost paid is two additional syscalls if fd is in range [0, 2]. [0]: e31eec77e4ab ("bpf: selftests: Fix fd cleanup in get_branch_snapshot") [1]: https://lore.kernel.org/bpf/CAADnVQKVKY8o_3aU8Gzke443+uHa-eGoM0h7W4srChMXU1S4Bg@mail.gmail.com Signed-off-by: Kumar Kartikeya Dwivedi Signed-off-by: Alexei Starovoitov Acked-by: Song Liu Acked-by: Andrii Nakryiko Link: https://lore.kernel.org/bpf/20211028063501.2239335-5-memxor@gmail.com --- tools/lib/bpf/bpf.c | 35 +++++++++++++++++++++------------ tools/lib/bpf/libbpf_internal.h | 24 ++++++++++++++++++++++ 2 files changed, 46 insertions(+), 13 deletions(-) diff --git a/tools/lib/bpf/bpf.c b/tools/lib/bpf/bpf.c index fe4b6ebc9b8fc..c09cbb868c9f1 100644 --- a/tools/lib/bpf/bpf.c +++ b/tools/lib/bpf/bpf.c @@ -65,13 +65,22 @@ static inline int sys_bpf(enum bpf_cmd cmd, union bpf_attr *attr, return syscall(__NR_bpf, cmd, attr, size); } +static inline int sys_bpf_fd(enum bpf_cmd cmd, union bpf_attr *attr, + unsigned int size) +{ + int fd; + + fd = sys_bpf(cmd, attr, size); + return ensure_good_fd(fd); +} + static inline int sys_bpf_prog_load(union bpf_attr *attr, unsigned int size) { int retries = 5; int fd; do { - fd = sys_bpf(BPF_PROG_LOAD, attr, size); + fd = sys_bpf_fd(BPF_PROG_LOAD, attr, size); } while (fd < 0 && errno == EAGAIN && retries-- > 0); return fd; @@ -104,7 +113,7 @@ int libbpf__bpf_create_map_xattr(const struct bpf_create_map_params *create_attr attr.inner_map_fd = create_attr->inner_map_fd; attr.map_extra = create_attr->map_extra; - fd = sys_bpf(BPF_MAP_CREATE, &attr, sizeof(attr)); + fd = sys_bpf_fd(BPF_MAP_CREATE, &attr, sizeof(attr)); return libbpf_err_errno(fd); } @@ -206,7 +215,7 @@ int bpf_create_map_in_map_node(enum bpf_map_type map_type, const char *name, attr.numa_node = node; } - fd = sys_bpf(BPF_MAP_CREATE, &attr, sizeof(attr)); + fd = sys_bpf_fd(BPF_MAP_CREATE, &attr, sizeof(attr)); return libbpf_err_errno(fd); } @@ -634,7 +643,7 @@ int bpf_obj_get(const char *pathname) memset(&attr, 0, sizeof(attr)); attr.pathname = ptr_to_u64((void *)pathname); - fd = sys_bpf(BPF_OBJ_GET, &attr, sizeof(attr)); + fd = sys_bpf_fd(BPF_OBJ_GET, &attr, sizeof(attr)); return libbpf_err_errno(fd); } @@ -745,7 +754,7 @@ int bpf_link_create(int prog_fd, int target_fd, break; } proceed: - fd = sys_bpf(BPF_LINK_CREATE, &attr, sizeof(attr)); + fd = sys_bpf_fd(BPF_LINK_CREATE, &attr, sizeof(attr)); return libbpf_err_errno(fd); } @@ -788,7 +797,7 @@ int bpf_iter_create(int link_fd) memset(&attr, 0, sizeof(attr)); attr.iter_create.link_fd = link_fd; - fd = sys_bpf(BPF_ITER_CREATE, &attr, sizeof(attr)); + fd = sys_bpf_fd(BPF_ITER_CREATE, &attr, sizeof(attr)); return libbpf_err_errno(fd); } @@ -946,7 +955,7 @@ int bpf_prog_get_fd_by_id(__u32 id) memset(&attr, 0, sizeof(attr)); attr.prog_id = id; - fd = sys_bpf(BPF_PROG_GET_FD_BY_ID, &attr, sizeof(attr)); + fd = sys_bpf_fd(BPF_PROG_GET_FD_BY_ID, &attr, sizeof(attr)); return libbpf_err_errno(fd); } @@ -958,7 +967,7 @@ int bpf_map_get_fd_by_id(__u32 id) memset(&attr, 0, sizeof(attr)); attr.map_id = id; - fd = sys_bpf(BPF_MAP_GET_FD_BY_ID, &attr, sizeof(attr)); + fd = sys_bpf_fd(BPF_MAP_GET_FD_BY_ID, &attr, sizeof(attr)); return libbpf_err_errno(fd); } @@ -970,7 +979,7 @@ int bpf_btf_get_fd_by_id(__u32 id) memset(&attr, 0, sizeof(attr)); attr.btf_id = id; - fd = sys_bpf(BPF_BTF_GET_FD_BY_ID, &attr, sizeof(attr)); + fd = sys_bpf_fd(BPF_BTF_GET_FD_BY_ID, &attr, sizeof(attr)); return libbpf_err_errno(fd); } @@ -982,7 +991,7 @@ int bpf_link_get_fd_by_id(__u32 id) memset(&attr, 0, sizeof(attr)); attr.link_id = id; - fd = sys_bpf(BPF_LINK_GET_FD_BY_ID, &attr, sizeof(attr)); + fd = sys_bpf_fd(BPF_LINK_GET_FD_BY_ID, &attr, sizeof(attr)); return libbpf_err_errno(fd); } @@ -1013,7 +1022,7 @@ int bpf_raw_tracepoint_open(const char *name, int prog_fd) attr.raw_tracepoint.name = ptr_to_u64(name); attr.raw_tracepoint.prog_fd = prog_fd; - fd = sys_bpf(BPF_RAW_TRACEPOINT_OPEN, &attr, sizeof(attr)); + fd = sys_bpf_fd(BPF_RAW_TRACEPOINT_OPEN, &attr, sizeof(attr)); return libbpf_err_errno(fd); } @@ -1033,7 +1042,7 @@ int bpf_load_btf(const void *btf, __u32 btf_size, char *log_buf, __u32 log_buf_s attr.btf_log_buf = ptr_to_u64(log_buf); } - fd = sys_bpf(BPF_BTF_LOAD, &attr, sizeof(attr)); + fd = sys_bpf_fd(BPF_BTF_LOAD, &attr, sizeof(attr)); if (fd < 0 && !do_log && log_buf && log_buf_size) { do_log = true; @@ -1075,7 +1084,7 @@ int bpf_enable_stats(enum bpf_stats_type type) memset(&attr, 0, sizeof(attr)); attr.enable_stats.type = type; - fd = sys_bpf(BPF_ENABLE_STATS, &attr, sizeof(attr)); + fd = sys_bpf_fd(BPF_ENABLE_STATS, &attr, sizeof(attr)); return libbpf_err_errno(fd); } diff --git a/tools/lib/bpf/libbpf_internal.h b/tools/lib/bpf/libbpf_internal.h index a6dd7e25747f3..aeb79e3a8ff99 100644 --- a/tools/lib/bpf/libbpf_internal.h +++ b/tools/lib/bpf/libbpf_internal.h @@ -13,6 +13,8 @@ #include #include #include +#include +#include #include "libbpf_legacy.h" #include "relo_core.h" @@ -491,4 +493,26 @@ static inline bool is_ldimm64_insn(struct bpf_insn *insn) return insn->code == (BPF_LD | BPF_IMM | BPF_DW); } +/* if fd is stdin, stdout, or stderr, dup to a fd greater than 2 + * Takes ownership of the fd passed in, and closes it if calling + * fcntl(fd, F_DUPFD_CLOEXEC, 3). + */ +static inline int ensure_good_fd(int fd) +{ + int old_fd = fd, saved_errno; + + if (fd < 0) + return fd; + if (fd < 3) { + fd = fcntl(fd, F_DUPFD_CLOEXEC, 3); + saved_errno = errno; + close(old_fd); + if (fd < 0) { + pr_warn("failed to dup FD %d to FD > 2: %d\n", old_fd, -saved_errno); + errno = saved_errno; + } + } + return fd; +} + #endif /* __LIBBPF_LIBBPF_INTERNAL_H */ From 92274e24b01b331ef7a4227135933e6163fe94aa Mon Sep 17 00:00:00 2001 From: Kumar Kartikeya Dwivedi Date: Thu, 28 Oct 2021 12:04:58 +0530 Subject: [PATCH 154/181] libbpf: Use O_CLOEXEC uniformly when opening fds There are some instances where we don't use O_CLOEXEC when opening an fd, fix these up. Otherwise, it is possible that a parallel fork causes these fds to leak into a child process on execve. Signed-off-by: Kumar Kartikeya Dwivedi Signed-off-by: Alexei Starovoitov Acked-by: Andrii Nakryiko Link: https://lore.kernel.org/bpf/20211028063501.2239335-6-memxor@gmail.com --- tools/lib/bpf/btf.c | 2 +- tools/lib/bpf/libbpf.c | 6 +++--- tools/lib/bpf/libbpf_probes.c | 2 +- tools/lib/bpf/linker.c | 4 ++-- tools/lib/bpf/xsk.c | 6 +++--- 5 files changed, 10 insertions(+), 10 deletions(-) diff --git a/tools/lib/bpf/btf.c b/tools/lib/bpf/btf.c index 0c628c33e23b7..7e4c5586bd877 100644 --- a/tools/lib/bpf/btf.c +++ b/tools/lib/bpf/btf.c @@ -897,7 +897,7 @@ static struct btf *btf_parse_elf(const char *path, struct btf *base_btf, return ERR_PTR(-LIBBPF_ERRNO__LIBELF); } - fd = open(path, O_RDONLY); + fd = open(path, O_RDONLY | O_CLOEXEC); if (fd < 0) { err = -errno; pr_warn("failed to open %s: %s\n", path, strerror(errno)); diff --git a/tools/lib/bpf/libbpf.c b/tools/lib/bpf/libbpf.c index 73e1e70a722f6..742d1a388179a 100644 --- a/tools/lib/bpf/libbpf.c +++ b/tools/lib/bpf/libbpf.c @@ -1232,7 +1232,7 @@ static int bpf_object__elf_init(struct bpf_object *obj) */ elf = elf_memory((char *)obj->efile.obj_buf, obj->efile.obj_buf_sz); } else { - obj->efile.fd = open(obj->path, O_RDONLY); + obj->efile.fd = open(obj->path, O_RDONLY | O_CLOEXEC); if (obj->efile.fd < 0) { char errmsg[STRERR_BUFSIZE], *cp; @@ -9615,7 +9615,7 @@ static int append_to_file(const char *file, const char *fmt, ...) int fd, n, err = 0; va_list ap; - fd = open(file, O_WRONLY | O_APPEND, 0); + fd = open(file, O_WRONLY | O_APPEND | O_CLOEXEC, 0); if (fd < 0) return -errno; @@ -11260,7 +11260,7 @@ int parse_cpu_mask_file(const char *fcpu, bool **mask, int *mask_sz) int fd, err = 0, len; char buf[128]; - fd = open(fcpu, O_RDONLY); + fd = open(fcpu, O_RDONLY | O_CLOEXEC); if (fd < 0) { err = -errno; pr_warn("Failed to open cpu mask file %s: %d\n", fcpu, err); diff --git a/tools/lib/bpf/libbpf_probes.c b/tools/lib/bpf/libbpf_probes.c index cd8c703dde718..68f2dbf364aa0 100644 --- a/tools/lib/bpf/libbpf_probes.c +++ b/tools/lib/bpf/libbpf_probes.c @@ -33,7 +33,7 @@ static int get_vendor_id(int ifindex) snprintf(path, sizeof(path), "/sys/class/net/%s/device/vendor", ifname); - fd = open(path, O_RDONLY); + fd = open(path, O_RDONLY | O_CLOEXEC); if (fd < 0) return -1; diff --git a/tools/lib/bpf/linker.c b/tools/lib/bpf/linker.c index ce0800e61dc71..f677dccdeae44 100644 --- a/tools/lib/bpf/linker.c +++ b/tools/lib/bpf/linker.c @@ -301,7 +301,7 @@ static int init_output_elf(struct bpf_linker *linker, const char *file) if (!linker->filename) return -ENOMEM; - linker->fd = open(file, O_WRONLY | O_CREAT | O_TRUNC, 0644); + linker->fd = open(file, O_WRONLY | O_CREAT | O_TRUNC | O_CLOEXEC, 0644); if (linker->fd < 0) { err = -errno; pr_warn("failed to create '%s': %d\n", file, err); @@ -556,7 +556,7 @@ static int linker_load_obj_file(struct bpf_linker *linker, const char *filename, obj->filename = filename; - obj->fd = open(filename, O_RDONLY); + obj->fd = open(filename, O_RDONLY | O_CLOEXEC); if (obj->fd < 0) { err = -errno; pr_warn("failed to open file '%s': %d\n", filename, err); diff --git a/tools/lib/bpf/xsk.c b/tools/lib/bpf/xsk.c index a2111696ba916..81f8fbc85e70d 100644 --- a/tools/lib/bpf/xsk.c +++ b/tools/lib/bpf/xsk.c @@ -300,7 +300,7 @@ int xsk_umem__create_v0_0_4(struct xsk_umem **umem_ptr, void *umem_area, if (!umem) return -ENOMEM; - umem->fd = socket(AF_XDP, SOCK_RAW, 0); + umem->fd = socket(AF_XDP, SOCK_RAW | SOCK_CLOEXEC, 0); if (umem->fd < 0) { err = -errno; goto out_umem_alloc; @@ -549,7 +549,7 @@ static int xsk_get_max_queues(struct xsk_socket *xsk) struct ifreq ifr = {}; int fd, err, ret; - fd = socket(AF_LOCAL, SOCK_DGRAM, 0); + fd = socket(AF_LOCAL, SOCK_DGRAM | SOCK_CLOEXEC, 0); if (fd < 0) return -errno; @@ -1046,7 +1046,7 @@ int xsk_socket__create_shared(struct xsk_socket **xsk_ptr, } if (umem->refcount++ > 0) { - xsk->fd = socket(AF_XDP, SOCK_RAW, 0); + xsk->fd = socket(AF_XDP, SOCK_RAW | SOCK_CLOEXEC, 0); if (xsk->fd < 0) { err = -errno; goto out_xsk_alloc; From 087cba799ced0573df499ddd3b2d8777e50cfb62 Mon Sep 17 00:00:00 2001 From: Kumar Kartikeya Dwivedi Date: Thu, 28 Oct 2021 12:04:59 +0530 Subject: [PATCH 155/181] selftests/bpf: Add weak/typeless ksym test for light skeleton Also, avoid using CO-RE features, as lskel doesn't support CO-RE, yet. Include both light and libbpf skeleton in same file to test both of them together. In c48e51c8b07a ("bpf: selftests: Add selftests for module kfunc support"), I added support for generating both lskel and libbpf skel for a BPF object, however the name parameter for bpftool caused collisions when included in same file together. This meant that every test needed a separate file for a libbpf/light skeleton separation instead of subtests. Change that by appending a "_lskel" suffix to the name for files using light skeleton, and convert all existing users. Signed-off-by: Kumar Kartikeya Dwivedi Signed-off-by: Alexei Starovoitov Acked-by: Andrii Nakryiko Link: https://lore.kernel.org/bpf/20211028063501.2239335-7-memxor@gmail.com --- tools/testing/selftests/bpf/Makefile | 4 +- .../selftests/bpf/prog_tests/atomics.c | 34 ++++++++-------- .../selftests/bpf/prog_tests/fentry_fexit.c | 16 ++++---- .../selftests/bpf/prog_tests/fentry_test.c | 14 +++---- .../selftests/bpf/prog_tests/fexit_sleep.c | 12 +++--- .../selftests/bpf/prog_tests/fexit_test.c | 14 +++---- .../selftests/bpf/prog_tests/kfunc_call.c | 6 +-- .../selftests/bpf/prog_tests/ksyms_btf.c | 35 +++++++++++++++- .../selftests/bpf/prog_tests/ksyms_module.c | 40 +++++++++++++++++-- .../bpf/prog_tests/ksyms_module_libbpf.c | 28 ------------- .../selftests/bpf/prog_tests/ringbuf.c | 12 +++--- .../selftests/bpf/prog_tests/trace_printk.c | 14 +++---- .../selftests/bpf/prog_tests/trace_vprintk.c | 12 +++--- .../selftests/bpf/prog_tests/verif_stats.c | 6 +-- .../selftests/bpf/progs/test_ksyms_weak.c | 2 +- 15 files changed, 142 insertions(+), 107 deletions(-) delete mode 100644 tools/testing/selftests/bpf/prog_tests/ksyms_module_libbpf.c diff --git a/tools/testing/selftests/bpf/Makefile b/tools/testing/selftests/bpf/Makefile index 2c464cb78d6b3..f49cb5fc85af8 100644 --- a/tools/testing/selftests/bpf/Makefile +++ b/tools/testing/selftests/bpf/Makefile @@ -325,7 +325,7 @@ LINKED_SKELS := test_static_linked.skel.h linked_funcs.skel.h \ LSKELS := kfunc_call_test.c fentry_test.c fexit_test.c fexit_sleep.c \ test_ringbuf.c atomics.c trace_printk.c trace_vprintk.c # Generate both light skeleton and libbpf skeleton for these -LSKELS_EXTRA := test_ksyms_module.c +LSKELS_EXTRA := test_ksyms_module.c test_ksyms_weak.c SKEL_BLACKLIST += $$(LSKELS) test_static_linked.skel.h-deps := test_static_linked1.o test_static_linked2.o @@ -404,7 +404,7 @@ $(TRUNNER_BPF_LSKELS): %.lskel.h: %.o $(BPFTOOL) | $(TRUNNER_OUTPUT) $(Q)$$(BPFTOOL) gen object $$(<:.o=.linked2.o) $$(<:.o=.linked1.o) $(Q)$$(BPFTOOL) gen object $$(<:.o=.linked3.o) $$(<:.o=.linked2.o) $(Q)diff $$(<:.o=.linked2.o) $$(<:.o=.linked3.o) - $(Q)$$(BPFTOOL) gen skeleton -L $$(<:.o=.linked3.o) name $$(notdir $$(<:.o=)) > $$@ + $(Q)$$(BPFTOOL) gen skeleton -L $$(<:.o=.linked3.o) name $$(notdir $$(<:.o=_lskel)) > $$@ $(TRUNNER_BPF_SKELS_LINKED): $(TRUNNER_BPF_OBJS) $(BPFTOOL) | $(TRUNNER_OUTPUT) $$(call msg,LINK-BPF,$(TRUNNER_BINARY),$$(@:.skel.h=.o)) diff --git a/tools/testing/selftests/bpf/prog_tests/atomics.c b/tools/testing/selftests/bpf/prog_tests/atomics.c index 1486be5d3209f..0f9525293881b 100644 --- a/tools/testing/selftests/bpf/prog_tests/atomics.c +++ b/tools/testing/selftests/bpf/prog_tests/atomics.c @@ -4,13 +4,13 @@ #include "atomics.lskel.h" -static void test_add(struct atomics *skel) +static void test_add(struct atomics_lskel *skel) { int err, prog_fd; __u32 duration = 0, retval; int link_fd; - link_fd = atomics__add__attach(skel); + link_fd = atomics_lskel__add__attach(skel); if (!ASSERT_GT(link_fd, 0, "attach(add)")) return; @@ -36,13 +36,13 @@ static void test_add(struct atomics *skel) close(link_fd); } -static void test_sub(struct atomics *skel) +static void test_sub(struct atomics_lskel *skel) { int err, prog_fd; __u32 duration = 0, retval; int link_fd; - link_fd = atomics__sub__attach(skel); + link_fd = atomics_lskel__sub__attach(skel); if (!ASSERT_GT(link_fd, 0, "attach(sub)")) return; @@ -69,13 +69,13 @@ static void test_sub(struct atomics *skel) close(link_fd); } -static void test_and(struct atomics *skel) +static void test_and(struct atomics_lskel *skel) { int err, prog_fd; __u32 duration = 0, retval; int link_fd; - link_fd = atomics__and__attach(skel); + link_fd = atomics_lskel__and__attach(skel); if (!ASSERT_GT(link_fd, 0, "attach(and)")) return; @@ -97,13 +97,13 @@ static void test_and(struct atomics *skel) close(link_fd); } -static void test_or(struct atomics *skel) +static void test_or(struct atomics_lskel *skel) { int err, prog_fd; __u32 duration = 0, retval; int link_fd; - link_fd = atomics__or__attach(skel); + link_fd = atomics_lskel__or__attach(skel); if (!ASSERT_GT(link_fd, 0, "attach(or)")) return; @@ -126,13 +126,13 @@ static void test_or(struct atomics *skel) close(link_fd); } -static void test_xor(struct atomics *skel) +static void test_xor(struct atomics_lskel *skel) { int err, prog_fd; __u32 duration = 0, retval; int link_fd; - link_fd = atomics__xor__attach(skel); + link_fd = atomics_lskel__xor__attach(skel); if (!ASSERT_GT(link_fd, 0, "attach(xor)")) return; @@ -154,13 +154,13 @@ static void test_xor(struct atomics *skel) close(link_fd); } -static void test_cmpxchg(struct atomics *skel) +static void test_cmpxchg(struct atomics_lskel *skel) { int err, prog_fd; __u32 duration = 0, retval; int link_fd; - link_fd = atomics__cmpxchg__attach(skel); + link_fd = atomics_lskel__cmpxchg__attach(skel); if (!ASSERT_GT(link_fd, 0, "attach(cmpxchg)")) return; @@ -183,13 +183,13 @@ static void test_cmpxchg(struct atomics *skel) close(link_fd); } -static void test_xchg(struct atomics *skel) +static void test_xchg(struct atomics_lskel *skel) { int err, prog_fd; __u32 duration = 0, retval; int link_fd; - link_fd = atomics__xchg__attach(skel); + link_fd = atomics_lskel__xchg__attach(skel); if (!ASSERT_GT(link_fd, 0, "attach(xchg)")) return; @@ -212,10 +212,10 @@ static void test_xchg(struct atomics *skel) void test_atomics(void) { - struct atomics *skel; + struct atomics_lskel *skel; __u32 duration = 0; - skel = atomics__open_and_load(); + skel = atomics_lskel__open_and_load(); if (CHECK(!skel, "skel_load", "atomics skeleton failed\n")) return; @@ -243,5 +243,5 @@ void test_atomics(void) test_xchg(skel); cleanup: - atomics__destroy(skel); + atomics_lskel__destroy(skel); } diff --git a/tools/testing/selftests/bpf/prog_tests/fentry_fexit.c b/tools/testing/selftests/bpf/prog_tests/fentry_fexit.c index 91154c2ba256d..4374ac8a8a915 100644 --- a/tools/testing/selftests/bpf/prog_tests/fentry_fexit.c +++ b/tools/testing/selftests/bpf/prog_tests/fentry_fexit.c @@ -6,23 +6,23 @@ void test_fentry_fexit(void) { - struct fentry_test *fentry_skel = NULL; - struct fexit_test *fexit_skel = NULL; + struct fentry_test_lskel *fentry_skel = NULL; + struct fexit_test_lskel *fexit_skel = NULL; __u64 *fentry_res, *fexit_res; __u32 duration = 0, retval; int err, prog_fd, i; - fentry_skel = fentry_test__open_and_load(); + fentry_skel = fentry_test_lskel__open_and_load(); if (CHECK(!fentry_skel, "fentry_skel_load", "fentry skeleton failed\n")) goto close_prog; - fexit_skel = fexit_test__open_and_load(); + fexit_skel = fexit_test_lskel__open_and_load(); if (CHECK(!fexit_skel, "fexit_skel_load", "fexit skeleton failed\n")) goto close_prog; - err = fentry_test__attach(fentry_skel); + err = fentry_test_lskel__attach(fentry_skel); if (CHECK(err, "fentry_attach", "fentry attach failed: %d\n", err)) goto close_prog; - err = fexit_test__attach(fexit_skel); + err = fexit_test_lskel__attach(fexit_skel); if (CHECK(err, "fexit_attach", "fexit attach failed: %d\n", err)) goto close_prog; @@ -44,6 +44,6 @@ void test_fentry_fexit(void) } close_prog: - fentry_test__destroy(fentry_skel); - fexit_test__destroy(fexit_skel); + fentry_test_lskel__destroy(fentry_skel); + fexit_test_lskel__destroy(fexit_skel); } diff --git a/tools/testing/selftests/bpf/prog_tests/fentry_test.c b/tools/testing/selftests/bpf/prog_tests/fentry_test.c index 174c89e7456e5..12921b3850d20 100644 --- a/tools/testing/selftests/bpf/prog_tests/fentry_test.c +++ b/tools/testing/selftests/bpf/prog_tests/fentry_test.c @@ -3,19 +3,19 @@ #include #include "fentry_test.lskel.h" -static int fentry_test(struct fentry_test *fentry_skel) +static int fentry_test(struct fentry_test_lskel *fentry_skel) { int err, prog_fd, i; __u32 duration = 0, retval; int link_fd; __u64 *result; - err = fentry_test__attach(fentry_skel); + err = fentry_test_lskel__attach(fentry_skel); if (!ASSERT_OK(err, "fentry_attach")) return err; /* Check that already linked program can't be attached again. */ - link_fd = fentry_test__test1__attach(fentry_skel); + link_fd = fentry_test_lskel__test1__attach(fentry_skel); if (!ASSERT_LT(link_fd, 0, "fentry_attach_link")) return -1; @@ -31,7 +31,7 @@ static int fentry_test(struct fentry_test *fentry_skel) return -1; } - fentry_test__detach(fentry_skel); + fentry_test_lskel__detach(fentry_skel); /* zero results for re-attach test */ memset(fentry_skel->bss, 0, sizeof(*fentry_skel->bss)); @@ -40,10 +40,10 @@ static int fentry_test(struct fentry_test *fentry_skel) void test_fentry_test(void) { - struct fentry_test *fentry_skel = NULL; + struct fentry_test_lskel *fentry_skel = NULL; int err; - fentry_skel = fentry_test__open_and_load(); + fentry_skel = fentry_test_lskel__open_and_load(); if (!ASSERT_OK_PTR(fentry_skel, "fentry_skel_load")) goto cleanup; @@ -55,5 +55,5 @@ void test_fentry_test(void) ASSERT_OK(err, "fentry_second_attach"); cleanup: - fentry_test__destroy(fentry_skel); + fentry_test_lskel__destroy(fentry_skel); } diff --git a/tools/testing/selftests/bpf/prog_tests/fexit_sleep.c b/tools/testing/selftests/bpf/prog_tests/fexit_sleep.c index 4e7f4b42ea298..f949647dbbc21 100644 --- a/tools/testing/selftests/bpf/prog_tests/fexit_sleep.c +++ b/tools/testing/selftests/bpf/prog_tests/fexit_sleep.c @@ -10,7 +10,7 @@ static int do_sleep(void *skel) { - struct fexit_sleep *fexit_skel = skel; + struct fexit_sleep_lskel *fexit_skel = skel; struct timespec ts1 = { .tv_nsec = 1 }; struct timespec ts2 = { .tv_sec = 10 }; @@ -25,16 +25,16 @@ static char child_stack[STACK_SIZE]; void test_fexit_sleep(void) { - struct fexit_sleep *fexit_skel = NULL; + struct fexit_sleep_lskel *fexit_skel = NULL; int wstatus, duration = 0; pid_t cpid; int err, fexit_cnt; - fexit_skel = fexit_sleep__open_and_load(); + fexit_skel = fexit_sleep_lskel__open_and_load(); if (CHECK(!fexit_skel, "fexit_skel_load", "fexit skeleton failed\n")) goto cleanup; - err = fexit_sleep__attach(fexit_skel); + err = fexit_sleep_lskel__attach(fexit_skel); if (CHECK(err, "fexit_attach", "fexit attach failed: %d\n", err)) goto cleanup; @@ -60,7 +60,7 @@ void test_fexit_sleep(void) */ close(fexit_skel->progs.nanosleep_fentry.prog_fd); close(fexit_skel->progs.nanosleep_fexit.prog_fd); - fexit_sleep__detach(fexit_skel); + fexit_sleep_lskel__detach(fexit_skel); /* kill the thread to unwind sys_nanosleep stack through the trampoline */ kill(cpid, 9); @@ -78,5 +78,5 @@ void test_fexit_sleep(void) goto cleanup; cleanup: - fexit_sleep__destroy(fexit_skel); + fexit_sleep_lskel__destroy(fexit_skel); } diff --git a/tools/testing/selftests/bpf/prog_tests/fexit_test.c b/tools/testing/selftests/bpf/prog_tests/fexit_test.c index af3dba7267017..d4887d8bb3968 100644 --- a/tools/testing/selftests/bpf/prog_tests/fexit_test.c +++ b/tools/testing/selftests/bpf/prog_tests/fexit_test.c @@ -3,19 +3,19 @@ #include #include "fexit_test.lskel.h" -static int fexit_test(struct fexit_test *fexit_skel) +static int fexit_test(struct fexit_test_lskel *fexit_skel) { int err, prog_fd, i; __u32 duration = 0, retval; int link_fd; __u64 *result; - err = fexit_test__attach(fexit_skel); + err = fexit_test_lskel__attach(fexit_skel); if (!ASSERT_OK(err, "fexit_attach")) return err; /* Check that already linked program can't be attached again. */ - link_fd = fexit_test__test1__attach(fexit_skel); + link_fd = fexit_test_lskel__test1__attach(fexit_skel); if (!ASSERT_LT(link_fd, 0, "fexit_attach_link")) return -1; @@ -31,7 +31,7 @@ static int fexit_test(struct fexit_test *fexit_skel) return -1; } - fexit_test__detach(fexit_skel); + fexit_test_lskel__detach(fexit_skel); /* zero results for re-attach test */ memset(fexit_skel->bss, 0, sizeof(*fexit_skel->bss)); @@ -40,10 +40,10 @@ static int fexit_test(struct fexit_test *fexit_skel) void test_fexit_test(void) { - struct fexit_test *fexit_skel = NULL; + struct fexit_test_lskel *fexit_skel = NULL; int err; - fexit_skel = fexit_test__open_and_load(); + fexit_skel = fexit_test_lskel__open_and_load(); if (!ASSERT_OK_PTR(fexit_skel, "fexit_skel_load")) goto cleanup; @@ -55,5 +55,5 @@ void test_fexit_test(void) ASSERT_OK(err, "fexit_second_attach"); cleanup: - fexit_test__destroy(fexit_skel); + fexit_test_lskel__destroy(fexit_skel); } diff --git a/tools/testing/selftests/bpf/prog_tests/kfunc_call.c b/tools/testing/selftests/bpf/prog_tests/kfunc_call.c index 9611f2bc50dfa..5c9c0176991b7 100644 --- a/tools/testing/selftests/bpf/prog_tests/kfunc_call.c +++ b/tools/testing/selftests/bpf/prog_tests/kfunc_call.c @@ -7,10 +7,10 @@ static void test_main(void) { - struct kfunc_call_test *skel; + struct kfunc_call_test_lskel *skel; int prog_fd, retval, err; - skel = kfunc_call_test__open_and_load(); + skel = kfunc_call_test_lskel__open_and_load(); if (!ASSERT_OK_PTR(skel, "skel")) return; @@ -26,7 +26,7 @@ static void test_main(void) ASSERT_OK(err, "bpf_prog_test_run(test2)"); ASSERT_EQ(retval, 3, "test2-retval"); - kfunc_call_test__destroy(skel); + kfunc_call_test_lskel__destroy(skel); } static void test_subprog(void) diff --git a/tools/testing/selftests/bpf/prog_tests/ksyms_btf.c b/tools/testing/selftests/bpf/prog_tests/ksyms_btf.c index cf3acfa5a91d5..79f6bd1e50d60 100644 --- a/tools/testing/selftests/bpf/prog_tests/ksyms_btf.c +++ b/tools/testing/selftests/bpf/prog_tests/ksyms_btf.c @@ -7,6 +7,7 @@ #include "test_ksyms_btf.skel.h" #include "test_ksyms_btf_null_check.skel.h" #include "test_ksyms_weak.skel.h" +#include "test_ksyms_weak.lskel.h" static int duration; @@ -89,11 +90,11 @@ static void test_weak_syms(void) int err; skel = test_ksyms_weak__open_and_load(); - if (CHECK(!skel, "test_ksyms_weak__open_and_load", "failed\n")) + if (!ASSERT_OK_PTR(skel, "test_ksyms_weak__open_and_load")) return; err = test_ksyms_weak__attach(skel); - if (CHECK(err, "test_ksyms_weak__attach", "skeleton attach failed: %d\n", err)) + if (!ASSERT_OK(err, "test_ksyms_weak__attach")) goto cleanup; /* trigger tracepoint */ @@ -109,6 +110,33 @@ static void test_weak_syms(void) test_ksyms_weak__destroy(skel); } +static void test_weak_syms_lskel(void) +{ + struct test_ksyms_weak_lskel *skel; + struct test_ksyms_weak_lskel__data *data; + int err; + + skel = test_ksyms_weak_lskel__open_and_load(); + if (!ASSERT_OK_PTR(skel, "test_ksyms_weak_lskel__open_and_load")) + return; + + err = test_ksyms_weak_lskel__attach(skel); + if (!ASSERT_OK(err, "test_ksyms_weak_lskel__attach")) + goto cleanup; + + /* trigger tracepoint */ + usleep(1); + + data = skel->data; + ASSERT_EQ(data->out__existing_typed, 0, "existing typed ksym"); + ASSERT_NEQ(data->out__existing_typeless, -1, "existing typeless ksym"); + ASSERT_EQ(data->out__non_existent_typeless, 0, "nonexistent typeless ksym"); + ASSERT_EQ(data->out__non_existent_typed, 0, "nonexistent typed ksym"); + +cleanup: + test_ksyms_weak_lskel__destroy(skel); +} + void test_ksyms_btf(void) { int percpu_datasec; @@ -136,4 +164,7 @@ void test_ksyms_btf(void) if (test__start_subtest("weak_ksyms")) test_weak_syms(); + + if (test__start_subtest("weak_ksyms_lskel")) + test_weak_syms_lskel(); } diff --git a/tools/testing/selftests/bpf/prog_tests/ksyms_module.c b/tools/testing/selftests/bpf/prog_tests/ksyms_module.c index 831447878d7b0..d490ad80eccb4 100644 --- a/tools/testing/selftests/bpf/prog_tests/ksyms_module.c +++ b/tools/testing/selftests/bpf/prog_tests/ksyms_module.c @@ -4,10 +4,11 @@ #include #include #include "test_ksyms_module.lskel.h" +#include "test_ksyms_module.skel.h" -void test_ksyms_module(void) +void test_ksyms_module_lskel(void) { - struct test_ksyms_module *skel; + struct test_ksyms_module_lskel *skel; int retval; int err; @@ -16,8 +17,8 @@ void test_ksyms_module(void) return; } - skel = test_ksyms_module__open_and_load(); - if (!ASSERT_OK_PTR(skel, "test_ksyms_module__open_and_load")) + skel = test_ksyms_module_lskel__open_and_load(); + if (!ASSERT_OK_PTR(skel, "test_ksyms_module_lskel__open_and_load")) return; err = bpf_prog_test_run(skel->progs.load.prog_fd, 1, &pkt_v4, sizeof(pkt_v4), NULL, NULL, (__u32 *)&retval, NULL); @@ -25,6 +26,37 @@ void test_ksyms_module(void) goto cleanup; ASSERT_EQ(retval, 0, "retval"); ASSERT_EQ(skel->bss->out_bpf_testmod_ksym, 42, "bpf_testmod_ksym"); +cleanup: + test_ksyms_module_lskel__destroy(skel); +} + +void test_ksyms_module_libbpf(void) +{ + struct test_ksyms_module *skel; + int retval, err; + + if (!env.has_testmod) { + test__skip(); + return; + } + + skel = test_ksyms_module__open_and_load(); + if (!ASSERT_OK_PTR(skel, "test_ksyms_module__open")) + return; + err = bpf_prog_test_run(bpf_program__fd(skel->progs.load), 1, &pkt_v4, + sizeof(pkt_v4), NULL, NULL, (__u32 *)&retval, NULL); + if (!ASSERT_OK(err, "bpf_prog_test_run")) + goto cleanup; + ASSERT_EQ(retval, 0, "retval"); + ASSERT_EQ(skel->bss->out_bpf_testmod_ksym, 42, "bpf_testmod_ksym"); cleanup: test_ksyms_module__destroy(skel); } + +void test_ksyms_module(void) +{ + if (test__start_subtest("lskel")) + test_ksyms_module_lskel(); + if (test__start_subtest("libbpf")) + test_ksyms_module_libbpf(); +} diff --git a/tools/testing/selftests/bpf/prog_tests/ksyms_module_libbpf.c b/tools/testing/selftests/bpf/prog_tests/ksyms_module_libbpf.c deleted file mode 100644 index e6343ef63af96..0000000000000 --- a/tools/testing/selftests/bpf/prog_tests/ksyms_module_libbpf.c +++ /dev/null @@ -1,28 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0 - -#include -#include -#include "test_ksyms_module.skel.h" - -void test_ksyms_module_libbpf(void) -{ - struct test_ksyms_module *skel; - int retval, err; - - if (!env.has_testmod) { - test__skip(); - return; - } - - skel = test_ksyms_module__open_and_load(); - if (!ASSERT_OK_PTR(skel, "test_ksyms_module__open")) - return; - err = bpf_prog_test_run(bpf_program__fd(skel->progs.load), 1, &pkt_v4, - sizeof(pkt_v4), NULL, NULL, (__u32 *)&retval, NULL); - if (!ASSERT_OK(err, "bpf_prog_test_run")) - goto cleanup; - ASSERT_EQ(retval, 0, "retval"); - ASSERT_EQ(skel->bss->out_bpf_testmod_ksym, 42, "bpf_testmod_ksym"); -cleanup: - test_ksyms_module__destroy(skel); -} diff --git a/tools/testing/selftests/bpf/prog_tests/ringbuf.c b/tools/testing/selftests/bpf/prog_tests/ringbuf.c index 4706cee843607..9a80fe8a6427c 100644 --- a/tools/testing/selftests/bpf/prog_tests/ringbuf.c +++ b/tools/testing/selftests/bpf/prog_tests/ringbuf.c @@ -58,7 +58,7 @@ static int process_sample(void *ctx, void *data, size_t len) } } -static struct test_ringbuf *skel; +static struct test_ringbuf_lskel *skel; static struct ring_buffer *ringbuf; static void trigger_samples() @@ -90,13 +90,13 @@ void test_ringbuf(void) int page_size = getpagesize(); void *mmap_ptr, *tmp_ptr; - skel = test_ringbuf__open(); + skel = test_ringbuf_lskel__open(); if (CHECK(!skel, "skel_open", "skeleton open failed\n")) return; skel->maps.ringbuf.max_entries = page_size; - err = test_ringbuf__load(skel); + err = test_ringbuf_lskel__load(skel); if (CHECK(err != 0, "skel_load", "skeleton load failed\n")) goto cleanup; @@ -154,7 +154,7 @@ void test_ringbuf(void) if (CHECK(!ringbuf, "ringbuf_create", "failed to create ringbuf\n")) goto cleanup; - err = test_ringbuf__attach(skel); + err = test_ringbuf_lskel__attach(skel); if (CHECK(err, "skel_attach", "skeleton attachment failed: %d\n", err)) goto cleanup; @@ -292,8 +292,8 @@ void test_ringbuf(void) CHECK(skel->bss->discarded != 1, "err_discarded", "exp %ld, got %ld\n", 1L, skel->bss->discarded); - test_ringbuf__detach(skel); + test_ringbuf_lskel__detach(skel); cleanup: ring_buffer__free(ringbuf); - test_ringbuf__destroy(skel); + test_ringbuf_lskel__destroy(skel); } diff --git a/tools/testing/selftests/bpf/prog_tests/trace_printk.c b/tools/testing/selftests/bpf/prog_tests/trace_printk.c index 3f7a7141265ee..cade7f12315f7 100644 --- a/tools/testing/selftests/bpf/prog_tests/trace_printk.c +++ b/tools/testing/selftests/bpf/prog_tests/trace_printk.c @@ -10,27 +10,27 @@ void serial_test_trace_printk(void) { + struct trace_printk_lskel__bss *bss; int err = 0, iter = 0, found = 0; - struct trace_printk__bss *bss; - struct trace_printk *skel; + struct trace_printk_lskel *skel; char *buf = NULL; FILE *fp = NULL; size_t buflen; - skel = trace_printk__open(); + skel = trace_printk_lskel__open(); if (!ASSERT_OK_PTR(skel, "trace_printk__open")) return; ASSERT_EQ(skel->rodata->fmt[0], 'T', "skel->rodata->fmt[0]"); skel->rodata->fmt[0] = 't'; - err = trace_printk__load(skel); + err = trace_printk_lskel__load(skel); if (!ASSERT_OK(err, "trace_printk__load")) goto cleanup; bss = skel->bss; - err = trace_printk__attach(skel); + err = trace_printk_lskel__attach(skel); if (!ASSERT_OK(err, "trace_printk__attach")) goto cleanup; @@ -43,7 +43,7 @@ void serial_test_trace_printk(void) /* wait for tracepoint to trigger */ usleep(1); - trace_printk__detach(skel); + trace_printk_lskel__detach(skel); if (!ASSERT_GT(bss->trace_printk_ran, 0, "bss->trace_printk_ran")) goto cleanup; @@ -65,7 +65,7 @@ void serial_test_trace_printk(void) goto cleanup; cleanup: - trace_printk__destroy(skel); + trace_printk_lskel__destroy(skel); free(buf); if (fp) fclose(fp); diff --git a/tools/testing/selftests/bpf/prog_tests/trace_vprintk.c b/tools/testing/selftests/bpf/prog_tests/trace_vprintk.c index 46101270cb1ab..7a4e313e85584 100644 --- a/tools/testing/selftests/bpf/prog_tests/trace_vprintk.c +++ b/tools/testing/selftests/bpf/prog_tests/trace_vprintk.c @@ -10,20 +10,20 @@ void serial_test_trace_vprintk(void) { + struct trace_vprintk_lskel__bss *bss; int err = 0, iter = 0, found = 0; - struct trace_vprintk__bss *bss; - struct trace_vprintk *skel; + struct trace_vprintk_lskel *skel; char *buf = NULL; FILE *fp = NULL; size_t buflen; - skel = trace_vprintk__open_and_load(); + skel = trace_vprintk_lskel__open_and_load(); if (!ASSERT_OK_PTR(skel, "trace_vprintk__open_and_load")) goto cleanup; bss = skel->bss; - err = trace_vprintk__attach(skel); + err = trace_vprintk_lskel__attach(skel); if (!ASSERT_OK(err, "trace_vprintk__attach")) goto cleanup; @@ -36,7 +36,7 @@ void serial_test_trace_vprintk(void) /* wait for tracepoint to trigger */ usleep(1); - trace_vprintk__detach(skel); + trace_vprintk_lskel__detach(skel); if (!ASSERT_GT(bss->trace_vprintk_ran, 0, "bss->trace_vprintk_ran")) goto cleanup; @@ -61,7 +61,7 @@ void serial_test_trace_vprintk(void) goto cleanup; cleanup: - trace_vprintk__destroy(skel); + trace_vprintk_lskel__destroy(skel); free(buf); if (fp) fclose(fp); diff --git a/tools/testing/selftests/bpf/prog_tests/verif_stats.c b/tools/testing/selftests/bpf/prog_tests/verif_stats.c index b4bae1340cf1a..a47e7c0e1ffd8 100644 --- a/tools/testing/selftests/bpf/prog_tests/verif_stats.c +++ b/tools/testing/selftests/bpf/prog_tests/verif_stats.c @@ -8,11 +8,11 @@ void test_verif_stats(void) { __u32 len = sizeof(struct bpf_prog_info); + struct trace_vprintk_lskel *skel; struct bpf_prog_info info = {}; - struct trace_vprintk *skel; int err; - skel = trace_vprintk__open_and_load(); + skel = trace_vprintk_lskel__open_and_load(); if (!ASSERT_OK_PTR(skel, "trace_vprintk__open_and_load")) goto cleanup; @@ -24,5 +24,5 @@ void test_verif_stats(void) goto cleanup; cleanup: - trace_vprintk__destroy(skel); + trace_vprintk_lskel__destroy(skel); } diff --git a/tools/testing/selftests/bpf/progs/test_ksyms_weak.c b/tools/testing/selftests/bpf/progs/test_ksyms_weak.c index 5f8379aadb296..8eadbd4caf7a7 100644 --- a/tools/testing/selftests/bpf/progs/test_ksyms_weak.c +++ b/tools/testing/selftests/bpf/progs/test_ksyms_weak.c @@ -38,7 +38,7 @@ int pass_handler(const void *ctx) /* tests existing symbols. */ rq = (struct rq *)bpf_per_cpu_ptr(&runqueues, 0); if (rq) - out__existing_typed = rq->cpu; + out__existing_typed = 0; out__existing_typeless = (__u64)&bpf_prog_active; /* tests non-existent symbols. */ From c3fc706e94f5653def2783ffcd809a38676b7551 Mon Sep 17 00:00:00 2001 From: Kumar Kartikeya Dwivedi Date: Thu, 28 Oct 2021 12:05:00 +0530 Subject: [PATCH 156/181] selftests/bpf: Fix fd cleanup in sk_lookup test Similar to the fix in commit: e31eec77e4ab ("bpf: selftests: Fix fd cleanup in get_branch_snapshot") We use designated initializer to set fds to -1 without breaking on future changes to MAX_SERVER constant denoting the array size. The particular close(0) occurs on non-reuseport tests, so it can be seen with -n 115/{2,3} but not 115/4. This can cause problems with future tests if they depend on BTF fd never being acquired as fd 0, breaking internal libbpf assumptions. Fixes: 0ab5539f8584 ("selftests/bpf: Tests for BPF_SK_LOOKUP attach point") Signed-off-by: Kumar Kartikeya Dwivedi Signed-off-by: Alexei Starovoitov Reviewed-by: Jakub Sitnicki Acked-by: Song Liu Link: https://lore.kernel.org/bpf/20211028063501.2239335-8-memxor@gmail.com --- tools/testing/selftests/bpf/prog_tests/sk_lookup.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tools/testing/selftests/bpf/prog_tests/sk_lookup.c b/tools/testing/selftests/bpf/prog_tests/sk_lookup.c index aee41547e7f45..6db07401bc493 100644 --- a/tools/testing/selftests/bpf/prog_tests/sk_lookup.c +++ b/tools/testing/selftests/bpf/prog_tests/sk_lookup.c @@ -598,7 +598,7 @@ static void query_lookup_prog(struct test_sk_lookup *skel) static void run_lookup_prog(const struct test *t) { - int server_fds[MAX_SERVERS] = { -1 }; + int server_fds[] = { [0 ... MAX_SERVERS - 1] = -1 }; int client_fd, reuse_conn_fd = -1; struct bpf_link *lookup_link; int i, err; @@ -1053,7 +1053,7 @@ static void run_sk_assign(struct test_sk_lookup *skel, struct bpf_program *lookup_prog, const char *remote_ip, const char *local_ip) { - int server_fds[MAX_SERVERS] = { -1 }; + int server_fds[] = { [0 ... MAX_SERVERS - 1] = -1 }; struct bpf_sk_lookup ctx; __u64 server_cookie; int i, err; From efadf2ad17a2d5dc90bda4e6e8b2f96af4c62dae Mon Sep 17 00:00:00 2001 From: Kumar Kartikeya Dwivedi Date: Thu, 28 Oct 2021 12:05:01 +0530 Subject: [PATCH 157/181] selftests/bpf: Fix memory leak in test_ima The allocated ring buffer is never freed, do so in the cleanup path. Fixes: f446b570ac7e ("bpf/selftests: Update the IMA test to use BPF ring buffer") Signed-off-by: Kumar Kartikeya Dwivedi Signed-off-by: Alexei Starovoitov Acked-by: Andrii Nakryiko Acked-by: Song Liu Link: https://lore.kernel.org/bpf/20211028063501.2239335-9-memxor@gmail.com --- tools/testing/selftests/bpf/prog_tests/test_ima.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/tools/testing/selftests/bpf/prog_tests/test_ima.c b/tools/testing/selftests/bpf/prog_tests/test_ima.c index 0252f61d611a9..97d8a6f84f4ab 100644 --- a/tools/testing/selftests/bpf/prog_tests/test_ima.c +++ b/tools/testing/selftests/bpf/prog_tests/test_ima.c @@ -43,7 +43,7 @@ static int process_sample(void *ctx, void *data, size_t len) void test_test_ima(void) { char measured_dir_template[] = "/tmp/ima_measuredXXXXXX"; - struct ring_buffer *ringbuf; + struct ring_buffer *ringbuf = NULL; const char *measured_dir; char cmd[256]; @@ -85,5 +85,6 @@ void test_test_ima(void) err = system(cmd); CHECK(err, "failed to run command", "%s, errno = %d\n", cmd, errno); close_prog: + ring_buffer__free(ringbuf); ima__destroy(skel); } From f48ad69097fe79d1de13c4d8fef556d4c11c5e68 Mon Sep 17 00:00:00 2001 From: Andrea Righi Date: Tue, 26 Oct 2021 16:34:09 +0200 Subject: [PATCH 158/181] selftests/bpf: Fix fclose/pclose mismatch in test_progs Make sure to use pclose() to properly close the pipe opened by popen(). Fixes: 81f77fd0deeb ("bpf: add selftest for stackmap with BPF_F_STACK_BUILD_ID") Signed-off-by: Andrea Righi Signed-off-by: Daniel Borkmann Reviewed-by: Shuah Khan Acked-by: Martin KaFai Lau Link: https://lore.kernel.org/bpf/20211026143409.42666-1-andrea.righi@canonical.com --- tools/testing/selftests/bpf/test_progs.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tools/testing/selftests/bpf/test_progs.c b/tools/testing/selftests/bpf/test_progs.c index 1f4a485669917..c65986bd9d070 100644 --- a/tools/testing/selftests/bpf/test_progs.c +++ b/tools/testing/selftests/bpf/test_progs.c @@ -382,7 +382,7 @@ int extract_build_id(char *build_id, size_t size) if (getline(&line, &len, fp) == -1) goto err; - fclose(fp); + pclose(fp); if (len > size) len = size; @@ -391,7 +391,7 @@ int extract_build_id(char *build_id, size_t size) free(line); return 0; err: - fclose(fp); + pclose(fp); return -1; } From 8a03e56b253e9691c90bc52ca199323d71b96204 Mon Sep 17 00:00:00 2001 From: Pawan Gupta Date: Fri, 29 Oct 2021 12:43:54 -0700 Subject: [PATCH 159/181] bpf: Disallow unprivileged bpf by default Disabling unprivileged BPF would help prevent unprivileged users from creating certain conditions required for potential speculative execution side-channel attacks on unmitigated affected hardware. A deep dive on such attacks and current mitigations is available here [0]. Sync with what many distros are currently applying already, and disable unprivileged BPF by default. An admin can enable this at runtime, if necessary, as described in 08389d888287 ("bpf: Add kconfig knob for disabling unpriv bpf by default"). [0] "BPF and Spectre: Mitigating transient execution attacks", Daniel Borkmann, eBPF Summit '21 https://ebpf.io/summit-2021-slides/eBPF_Summit_2021-Keynote-Daniel_Borkmann-BPF_and_Spectre.pdf Signed-off-by: Pawan Gupta Signed-off-by: Daniel Borkmann Acked-by: Daniel Borkmann Acked-by: Mark Rutland Link: https://lore.kernel.org/bpf/0ace9ce3f97656d5f62d11093ad7ee81190c3c25.1635535215.git.pawan.kumar.gupta@linux.intel.com --- kernel/bpf/Kconfig | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/kernel/bpf/Kconfig b/kernel/bpf/Kconfig index a82d6de865227..d24d518ddd634 100644 --- a/kernel/bpf/Kconfig +++ b/kernel/bpf/Kconfig @@ -64,6 +64,7 @@ config BPF_JIT_DEFAULT_ON config BPF_UNPRIV_DEFAULT_OFF bool "Disable unprivileged BPF by default" + default y depends on BPF_SYSCALL help Disables unprivileged BPF by default by setting the corresponding @@ -72,6 +73,12 @@ config BPF_UNPRIV_DEFAULT_OFF disable it by setting it to 1 (from which no other transition to 0 is possible anymore). + Unprivileged BPF could be used to exploit certain potential + speculative execution side-channel vulnerabilities on unmitigated + affected hardware. + + If you are unsure how to answer this question, answer Y. + source "kernel/bpf/preload/Kconfig" config BPF_LSM From 0133c20480b14820d43c37c0e9502da4bffcad3a Mon Sep 17 00:00:00 2001 From: Andrii Nakryiko Date: Fri, 29 Oct 2021 11:29:07 -0700 Subject: [PATCH 160/181] selftests/bpf: Fix strobemeta selftest regression After most recent nightly Clang update strobemeta selftests started failing with the following error (relevant portion of assembly included): 1624: (85) call bpf_probe_read_user_str#114 1625: (bf) r1 = r0 1626: (18) r2 = 0xfffffffe 1628: (5f) r1 &= r2 1629: (55) if r1 != 0x0 goto pc+7 1630: (07) r9 += 104 1631: (6b) *(u16 *)(r9 +0) = r0 1632: (67) r0 <<= 32 1633: (77) r0 >>= 32 1634: (79) r1 = *(u64 *)(r10 -456) 1635: (0f) r1 += r0 1636: (7b) *(u64 *)(r10 -456) = r1 1637: (79) r1 = *(u64 *)(r10 -368) 1638: (c5) if r1 s< 0x1 goto pc+778 1639: (bf) r6 = r8 1640: (0f) r6 += r7 1641: (b4) w1 = 0 1642: (6b) *(u16 *)(r6 +108) = r1 1643: (79) r3 = *(u64 *)(r10 -352) 1644: (79) r9 = *(u64 *)(r10 -456) 1645: (bf) r1 = r9 1646: (b4) w2 = 1 1647: (85) call bpf_probe_read_user_str#114 R1 unbounded memory access, make sure to bounds check any such access In the above code r0 and r1 are implicitly related. Clang knows that, but verifier isn't able to infer this relationship. Yonghong Song narrowed down this "regression" in code generation to a recent Clang optimization change ([0]), which for BPF target generates code pattern that BPF verifier can't handle and loses track of register boundaries. This patch works around the issue by adding an BPF assembly-based helper that helps to prove to the verifier that upper bound of the register is a given constant by controlling the exact share of generated BPF instruction sequence. This fixes the immediate issue for strobemeta selftest. [0] https://github.com/llvm/llvm-project/commit/acabad9ff6bf13e00305d9d8621ee8eafc1f8b08 Signed-off-by: Andrii Nakryiko Signed-off-by: Daniel Borkmann Acked-by: Yonghong Song Link: https://lore.kernel.org/bpf/20211029182907.166910-1-andrii@kernel.org --- tools/testing/selftests/bpf/progs/strobemeta.h | 11 +++++++++++ 1 file changed, 11 insertions(+) diff --git a/tools/testing/selftests/bpf/progs/strobemeta.h b/tools/testing/selftests/bpf/progs/strobemeta.h index 7de534f38c3f1..3687ea755ab5a 100644 --- a/tools/testing/selftests/bpf/progs/strobemeta.h +++ b/tools/testing/selftests/bpf/progs/strobemeta.h @@ -10,6 +10,14 @@ #include #include +#define bpf_clamp_umax(VAR, UMAX) \ + asm volatile ( \ + "if %0 <= %[max] goto +1\n" \ + "%0 = %[max]\n" \ + : "+r"(VAR) \ + : [max]"i"(UMAX) \ + ) + typedef uint32_t pid_t; struct task_struct {}; @@ -413,6 +421,7 @@ static __always_inline void *read_map_var(struct strobemeta_cfg *cfg, len = bpf_probe_read_user_str(payload, STROBE_MAX_STR_LEN, map.tag); if (len <= STROBE_MAX_STR_LEN) { + bpf_clamp_umax(len, STROBE_MAX_STR_LEN); descr->tag_len = len; payload += len; } @@ -430,6 +439,7 @@ static __always_inline void *read_map_var(struct strobemeta_cfg *cfg, len = bpf_probe_read_user_str(payload, STROBE_MAX_STR_LEN, map.entries[i].key); if (len <= STROBE_MAX_STR_LEN) { + bpf_clamp_umax(len, STROBE_MAX_STR_LEN); descr->key_lens[i] = len; payload += len; } @@ -437,6 +447,7 @@ static __always_inline void *read_map_var(struct strobemeta_cfg *cfg, len = bpf_probe_read_user_str(payload, STROBE_MAX_STR_LEN, map.entries[i].val); if (len <= STROBE_MAX_STR_LEN) { + bpf_clamp_umax(len, STROBE_MAX_STR_LEN); descr->val_lens[i] = len; payload += len; } From 7303524e04af49a47991e19f895c3b8cdc3796c7 Mon Sep 17 00:00:00 2001 From: Liu Jian Date: Fri, 29 Oct 2021 22:12:14 +0800 Subject: [PATCH 161/181] skmsg: Lose offset info in sk_psock_skb_ingress If sockmap enable strparser, there are lose offset info in sk_psock_skb_ingress(). If the length determined by parse_msg function is not skb->len, the skb will be converted to sk_msg multiple times, and userspace app will get the data multiple times. Fix this by get the offset and length from strp_msg. And as Cong suggested, add one bit in skb->_sk_redir to distinguish enable or disable strparser. Fixes: 604326b41a6fb ("bpf, sockmap: convert to generic sk_msg interface") Signed-off-by: Liu Jian Signed-off-by: Daniel Borkmann Reviewed-by: Cong Wang Acked-by: John Fastabend Link: https://lore.kernel.org/bpf/20211029141216.211899-1-liujian56@huawei.com --- include/linux/skmsg.h | 18 ++++++++++++++++-- net/core/skmsg.c | 43 +++++++++++++++++++++++++++++++++---------- 2 files changed, 49 insertions(+), 12 deletions(-) diff --git a/include/linux/skmsg.h b/include/linux/skmsg.h index 14ab0c0bc9241..94e2a1f6e58db 100644 --- a/include/linux/skmsg.h +++ b/include/linux/skmsg.h @@ -508,8 +508,22 @@ static inline bool sk_psock_strp_enabled(struct sk_psock *psock) #if IS_ENABLED(CONFIG_NET_SOCK_MSG) -/* We only have one bit so far. */ -#define BPF_F_PTR_MASK ~(BPF_F_INGRESS) +#define BPF_F_STRPARSER (1UL << 1) + +/* We only have two bits so far. */ +#define BPF_F_PTR_MASK ~(BPF_F_INGRESS | BPF_F_STRPARSER) + +static inline bool skb_bpf_strparser(const struct sk_buff *skb) +{ + unsigned long sk_redir = skb->_sk_redir; + + return sk_redir & BPF_F_STRPARSER; +} + +static inline void skb_bpf_set_strparser(struct sk_buff *skb) +{ + skb->_sk_redir |= BPF_F_STRPARSER; +} static inline bool skb_bpf_ingress(const struct sk_buff *skb) { diff --git a/net/core/skmsg.c b/net/core/skmsg.c index 2d6249b289284..9701a1404ccb2 100644 --- a/net/core/skmsg.c +++ b/net/core/skmsg.c @@ -494,6 +494,7 @@ static struct sk_msg *sk_psock_create_ingress_msg(struct sock *sk, } static int sk_psock_skb_ingress_enqueue(struct sk_buff *skb, + u32 off, u32 len, struct sk_psock *psock, struct sock *sk, struct sk_msg *msg) @@ -507,11 +508,11 @@ static int sk_psock_skb_ingress_enqueue(struct sk_buff *skb, */ if (skb_linearize(skb)) return -EAGAIN; - num_sge = skb_to_sgvec(skb, msg->sg.data, 0, skb->len); + num_sge = skb_to_sgvec(skb, msg->sg.data, off, len); if (unlikely(num_sge < 0)) return num_sge; - copied = skb->len; + copied = len; msg->sg.start = 0; msg->sg.size = copied; msg->sg.end = num_sge; @@ -522,9 +523,11 @@ static int sk_psock_skb_ingress_enqueue(struct sk_buff *skb, return copied; } -static int sk_psock_skb_ingress_self(struct sk_psock *psock, struct sk_buff *skb); +static int sk_psock_skb_ingress_self(struct sk_psock *psock, struct sk_buff *skb, + u32 off, u32 len); -static int sk_psock_skb_ingress(struct sk_psock *psock, struct sk_buff *skb) +static int sk_psock_skb_ingress(struct sk_psock *psock, struct sk_buff *skb, + u32 off, u32 len) { struct sock *sk = psock->sk; struct sk_msg *msg; @@ -535,7 +538,7 @@ static int sk_psock_skb_ingress(struct sk_psock *psock, struct sk_buff *skb) * correctly. */ if (unlikely(skb->sk == sk)) - return sk_psock_skb_ingress_self(psock, skb); + return sk_psock_skb_ingress_self(psock, skb, off, len); msg = sk_psock_create_ingress_msg(sk, skb); if (!msg) return -EAGAIN; @@ -547,7 +550,7 @@ static int sk_psock_skb_ingress(struct sk_psock *psock, struct sk_buff *skb) * into user buffers. */ skb_set_owner_r(skb, sk); - err = sk_psock_skb_ingress_enqueue(skb, psock, sk, msg); + err = sk_psock_skb_ingress_enqueue(skb, off, len, psock, sk, msg); if (err < 0) kfree(msg); return err; @@ -557,7 +560,8 @@ static int sk_psock_skb_ingress(struct sk_psock *psock, struct sk_buff *skb) * skb. In this case we do not need to check memory limits or skb_set_owner_r * because the skb is already accounted for here. */ -static int sk_psock_skb_ingress_self(struct sk_psock *psock, struct sk_buff *skb) +static int sk_psock_skb_ingress_self(struct sk_psock *psock, struct sk_buff *skb, + u32 off, u32 len) { struct sk_msg *msg = kzalloc(sizeof(*msg), __GFP_NOWARN | GFP_ATOMIC); struct sock *sk = psock->sk; @@ -567,7 +571,7 @@ static int sk_psock_skb_ingress_self(struct sk_psock *psock, struct sk_buff *skb return -EAGAIN; sk_msg_init(msg); skb_set_owner_r(skb, sk); - err = sk_psock_skb_ingress_enqueue(skb, psock, sk, msg); + err = sk_psock_skb_ingress_enqueue(skb, off, len, psock, sk, msg); if (err < 0) kfree(msg); return err; @@ -581,7 +585,7 @@ static int sk_psock_handle_skb(struct sk_psock *psock, struct sk_buff *skb, return -EAGAIN; return skb_send_sock(psock->sk, skb, off, len); } - return sk_psock_skb_ingress(psock, skb); + return sk_psock_skb_ingress(psock, skb, off, len); } static void sk_psock_skb_state(struct sk_psock *psock, @@ -624,6 +628,12 @@ static void sk_psock_backlog(struct work_struct *work) while ((skb = skb_dequeue(&psock->ingress_skb))) { len = skb->len; off = 0; + if (skb_bpf_strparser(skb)) { + struct strp_msg *stm = strp_msg(skb); + + off = stm->offset; + len = stm->full_len; + } start: ingress = skb_bpf_ingress(skb); skb_bpf_redirect_clear(skb); @@ -863,6 +873,7 @@ static int sk_psock_skb_redirect(struct sk_psock *from, struct sk_buff *skb) * return code, but then didn't set a redirect interface. */ if (unlikely(!sk_other)) { + skb_bpf_redirect_clear(skb); sock_drop(from->sk, skb); return -EIO; } @@ -930,6 +941,7 @@ static int sk_psock_verdict_apply(struct sk_psock *psock, struct sk_buff *skb, { struct sock *sk_other; int err = 0; + u32 len, off; switch (verdict) { case __SK_PASS: @@ -937,6 +949,7 @@ static int sk_psock_verdict_apply(struct sk_psock *psock, struct sk_buff *skb, sk_other = psock->sk; if (sock_flag(sk_other, SOCK_DEAD) || !sk_psock_test_state(psock, SK_PSOCK_TX_ENABLED)) { + skb_bpf_redirect_clear(skb); goto out_free; } @@ -949,7 +962,15 @@ static int sk_psock_verdict_apply(struct sk_psock *psock, struct sk_buff *skb, * retrying later from workqueue. */ if (skb_queue_empty(&psock->ingress_skb)) { - err = sk_psock_skb_ingress_self(psock, skb); + len = skb->len; + off = 0; + if (skb_bpf_strparser(skb)) { + struct strp_msg *stm = strp_msg(skb); + + off = stm->offset; + len = stm->full_len; + } + err = sk_psock_skb_ingress_self(psock, skb, off, len); } if (err < 0) { spin_lock_bh(&psock->ingress_lock); @@ -1015,6 +1036,8 @@ static void sk_psock_strp_read(struct strparser *strp, struct sk_buff *skb) skb_dst_drop(skb); skb_bpf_redirect_clear(skb); ret = bpf_prog_run_pin_on_cpu(prog, skb); + if (ret == SK_PASS) + skb_bpf_set_strparser(skb); ret = sk_psock_map_verd(ret, skb_bpf_redirect_fetch(skb)); skb->sk = NULL; } From b556c3fd467628341cc7680e4271790cafd79dc4 Mon Sep 17 00:00:00 2001 From: Liu Jian Date: Fri, 29 Oct 2021 22:12:15 +0800 Subject: [PATCH 162/181] selftests, bpf: Fix test_txmsg_ingress_parser error After "skmsg: lose offset info in sk_psock_skb_ingress", the test case with ktls failed. This because ktls parser(tls_read_size) return value is 285 not 256. The case like this: tls_sk1 --> redir_sk --> tls_sk2 tls_sk1 sent out 512 bytes data, after tls related processing redir_sk recved 570 btyes data, and redirect 512 (skb_use_parser) bytes data to tls_sk2; but tls_sk2 needs 285 * 2 bytes data, receive timeout occurred. Signed-off-by: Liu Jian Signed-off-by: Daniel Borkmann Acked-by: John Fastabend Link: https://lore.kernel.org/bpf/20211029141216.211899-2-liujian56@huawei.com --- tools/testing/selftests/bpf/test_sockmap.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/tools/testing/selftests/bpf/test_sockmap.c b/tools/testing/selftests/bpf/test_sockmap.c index eefd445b96fc7..06924917ad77f 100644 --- a/tools/testing/selftests/bpf/test_sockmap.c +++ b/tools/testing/selftests/bpf/test_sockmap.c @@ -1680,6 +1680,8 @@ static void test_txmsg_ingress_parser(int cgrp, struct sockmap_options *opt) { txmsg_pass = 1; skb_use_parser = 512; + if (ktls == 1) + skb_use_parser = 570; opt->iov_length = 256; opt->iov_count = 1; opt->rate = 2; From d69672147faa2a7671c0779fa5b9ad99e4fca4e3 Mon Sep 17 00:00:00 2001 From: Liu Jian Date: Fri, 29 Oct 2021 22:12:16 +0800 Subject: [PATCH 163/181] selftests, bpf: Add one test for sockmap with strparser Add the test to check sockmap with strparser is working well. Signed-off-by: Liu Jian Signed-off-by: Daniel Borkmann Acked-by: John Fastabend Link: https://lore.kernel.org/bpf/20211029141216.211899-3-liujian56@huawei.com --- tools/testing/selftests/bpf/test_sockmap.c | 33 ++++++++++++++++++++-- 1 file changed, 30 insertions(+), 3 deletions(-) diff --git a/tools/testing/selftests/bpf/test_sockmap.c b/tools/testing/selftests/bpf/test_sockmap.c index 06924917ad77f..1ba7e7346afb2 100644 --- a/tools/testing/selftests/bpf/test_sockmap.c +++ b/tools/testing/selftests/bpf/test_sockmap.c @@ -139,6 +139,7 @@ struct sockmap_options { bool sendpage; bool data_test; bool drop_expected; + bool check_recved_len; int iov_count; int iov_length; int rate; @@ -556,8 +557,12 @@ static int msg_loop(int fd, int iov_count, int iov_length, int cnt, int err, i, flags = MSG_NOSIGNAL; bool drop = opt->drop_expected; bool data = opt->data_test; + int iov_alloc_length = iov_length; - err = msg_alloc_iov(&msg, iov_count, iov_length, data, tx); + if (!tx && opt->check_recved_len) + iov_alloc_length *= 2; + + err = msg_alloc_iov(&msg, iov_count, iov_alloc_length, data, tx); if (err) goto out_errno; if (peek_flag) { @@ -665,6 +670,13 @@ static int msg_loop(int fd, int iov_count, int iov_length, int cnt, s->bytes_recvd += recv; + if (opt->check_recved_len && s->bytes_recvd > total_bytes) { + errno = EMSGSIZE; + fprintf(stderr, "recv failed(), bytes_recvd:%zd, total_bytes:%f\n", + s->bytes_recvd, total_bytes); + goto out_errno; + } + if (data) { int chunk_sz = opt->sendpage ? iov_length * cnt : @@ -744,7 +756,8 @@ static int sendmsg_test(struct sockmap_options *opt) rxpid = fork(); if (rxpid == 0) { - iov_buf -= (txmsg_pop - txmsg_start_pop + 1); + if (txmsg_pop || txmsg_start_pop) + iov_buf -= (txmsg_pop - txmsg_start_pop + 1); if (opt->drop_expected || txmsg_ktls_skb_drop) _exit(0); @@ -1688,6 +1701,19 @@ static void test_txmsg_ingress_parser(int cgrp, struct sockmap_options *opt) test_exec(cgrp, opt); } +static void test_txmsg_ingress_parser2(int cgrp, struct sockmap_options *opt) +{ + if (ktls == 1) + return; + skb_use_parser = 10; + opt->iov_length = 20; + opt->iov_count = 1; + opt->rate = 1; + opt->check_recved_len = true; + test_exec(cgrp, opt); + opt->check_recved_len = false; +} + char *map_names[] = { "sock_map", "sock_map_txmsg", @@ -1786,7 +1812,8 @@ struct _test test[] = { {"txmsg test pull-data", test_txmsg_pull}, {"txmsg test pop-data", test_txmsg_pop}, {"txmsg test push/pop data", test_txmsg_push_pop}, - {"txmsg text ingress parser", test_txmsg_ingress_parser}, + {"txmsg test ingress parser", test_txmsg_ingress_parser}, + {"txmsg test ingress parser2", test_txmsg_ingress_parser2}, }; static int check_whitelist(struct _test *t, struct sockmap_options *opt) From 4b54214f39ff24a7e9b7231ae9c05d1c2c424280 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Bj=C3=B6rn=20T=C3=B6pel?= Date: Thu, 28 Oct 2021 18:10:54 +0200 Subject: [PATCH 164/181] riscv, bpf: Increase the maximum number of iterations MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Now that BPF programs can be up to 1M instructions, it is not uncommon that a program requires more than the current 16 iterations to converge. Bump it to 32, which is enough for selftests/bpf, and test_bpf.ko. Signed-off-by: Björn Töpel Signed-off-by: Daniel Borkmann Link: https://lore.kernel.org/bpf/20211028161057.520552-2-bjorn@kernel.org --- arch/riscv/net/bpf_jit_core.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/riscv/net/bpf_jit_core.c b/arch/riscv/net/bpf_jit_core.c index 7ccc809f2c192..ef9fcf6ea749f 100644 --- a/arch/riscv/net/bpf_jit_core.c +++ b/arch/riscv/net/bpf_jit_core.c @@ -11,7 +11,7 @@ #include "bpf_jit.h" /* Number of iterations to try until offsets converge. */ -#define NR_JIT_ITERATIONS 16 +#define NR_JIT_ITERATIONS 32 static int build_body(struct rv_jit_context *ctx, bool extra_pass, int *offset) { From b390d69831ee30e10a4ef410bee157e73b149036 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Bj=C3=B6rn=20T=C3=B6pel?= Date: Thu, 28 Oct 2021 18:10:55 +0200 Subject: [PATCH 165/181] tools, build: Add RISC-V to HOSTARCH parsing MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Add RISC-V to the HOSTARCH parsing, so that ARCH is "riscv", and not "riscv32" or "riscv64". This affects the perf and libbpf builds, so that arch specific includes are correctly picked up for RISC-V. Signed-off-by: Björn Töpel Signed-off-by: Daniel Borkmann Link: https://lore.kernel.org/bpf/20211028161057.520552-3-bjorn@kernel.org --- tools/scripts/Makefile.arch | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/tools/scripts/Makefile.arch b/tools/scripts/Makefile.arch index b10b7a27c33fd..0c6c7f4568878 100644 --- a/tools/scripts/Makefile.arch +++ b/tools/scripts/Makefile.arch @@ -4,7 +4,8 @@ HOSTARCH := $(shell uname -m | sed -e s/i.86/x86/ -e s/x86_64/x86/ \ -e /arm64/!s/arm.*/arm/ -e s/sa110/arm/ \ -e s/s390x/s390/ -e s/parisc64/parisc/ \ -e s/ppc.*/powerpc/ -e s/mips.*/mips/ \ - -e s/sh[234].*/sh/ -e s/aarch64.*/arm64/ ) + -e s/sh[234].*/sh/ -e s/aarch64.*/arm64/ \ + -e s/riscv.*/riscv/) ifndef ARCH ARCH := $(HOSTARCH) From 589fed479ba1e93f94d9772aa6162cd81f7e491c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Bj=C3=B6rn=20T=C3=B6pel?= Date: Thu, 28 Oct 2021 18:10:56 +0200 Subject: [PATCH 166/181] riscv, libbpf: Add RISC-V (RV64) support to bpf_tracing.h MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Add macros for 64-bit RISC-V PT_REGS to bpf_tracing.h. Signed-off-by: Björn Töpel Signed-off-by: Daniel Borkmann Link: https://lore.kernel.org/bpf/20211028161057.520552-4-bjorn@kernel.org --- tools/lib/bpf/bpf_tracing.h | 32 ++++++++++++++++++++++++++++++++ 1 file changed, 32 insertions(+) diff --git a/tools/lib/bpf/bpf_tracing.h b/tools/lib/bpf/bpf_tracing.h index d6bfbe009296c..db05a59371056 100644 --- a/tools/lib/bpf/bpf_tracing.h +++ b/tools/lib/bpf/bpf_tracing.h @@ -24,6 +24,9 @@ #elif defined(__TARGET_ARCH_sparc) #define bpf_target_sparc #define bpf_target_defined +#elif defined(__TARGET_ARCH_riscv) + #define bpf_target_riscv + #define bpf_target_defined #else /* Fall back to what the compiler says */ @@ -48,6 +51,9 @@ #elif defined(__sparc__) #define bpf_target_sparc #define bpf_target_defined +#elif defined(__riscv) && __riscv_xlen == 64 + #define bpf_target_riscv + #define bpf_target_defined #endif /* no compiler target */ #endif @@ -288,6 +294,32 @@ struct pt_regs; #define PT_REGS_IP_CORE(x) BPF_CORE_READ((x), pc) #endif +#elif defined(bpf_target_riscv) + +struct pt_regs; +#define PT_REGS_RV const volatile struct user_regs_struct +#define PT_REGS_PARM1(x) (((PT_REGS_RV *)(x))->a0) +#define PT_REGS_PARM2(x) (((PT_REGS_RV *)(x))->a1) +#define PT_REGS_PARM3(x) (((PT_REGS_RV *)(x))->a2) +#define PT_REGS_PARM4(x) (((PT_REGS_RV *)(x))->a3) +#define PT_REGS_PARM5(x) (((PT_REGS_RV *)(x))->a4) +#define PT_REGS_RET(x) (((PT_REGS_RV *)(x))->ra) +#define PT_REGS_FP(x) (((PT_REGS_RV *)(x))->s5) +#define PT_REGS_RC(x) (((PT_REGS_RV *)(x))->a5) +#define PT_REGS_SP(x) (((PT_REGS_RV *)(x))->sp) +#define PT_REGS_IP(x) (((PT_REGS_RV *)(x))->epc) + +#define PT_REGS_PARM1_CORE(x) BPF_CORE_READ((PT_REGS_RV *)(x), a0) +#define PT_REGS_PARM2_CORE(x) BPF_CORE_READ((PT_REGS_RV *)(x), a1) +#define PT_REGS_PARM3_CORE(x) BPF_CORE_READ((PT_REGS_RV *)(x), a2) +#define PT_REGS_PARM4_CORE(x) BPF_CORE_READ((PT_REGS_RV *)(x), a3) +#define PT_REGS_PARM5_CORE(x) BPF_CORE_READ((PT_REGS_RV *)(x), a4) +#define PT_REGS_RET_CORE(x) BPF_CORE_READ((PT_REGS_RV *)(x), ra) +#define PT_REGS_FP_CORE(x) BPF_CORE_READ((PT_REGS_RV *)(x), fp) +#define PT_REGS_RC_CORE(x) BPF_CORE_READ((PT_REGS_RV *)(x), a5) +#define PT_REGS_SP_CORE(x) BPF_CORE_READ((PT_REGS_RV *)(x), sp) +#define PT_REGS_IP_CORE(x) BPF_CORE_READ((PT_REGS_RV *)(x), epc) + #endif #if defined(bpf_target_powerpc) From 36e70b9b06bf14a0fac87315f0e73d6e17e80aad Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Bj=C3=B6rn=20T=C3=B6pel?= Date: Thu, 28 Oct 2021 18:10:57 +0200 Subject: [PATCH 167/181] selftests, bpf: Fix broken riscv build MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This patch is closely related to commit 6016df8fe874 ("selftests/bpf: Fix broken riscv build"). When clang includes the system include directories, but targeting BPF program, __BITS_PER_LONG defaults to 32, unless explicitly set. Work around this problem, by explicitly setting __BITS_PER_LONG to __riscv_xlen. Signed-off-by: Björn Töpel Signed-off-by: Daniel Borkmann Link: https://lore.kernel.org/bpf/20211028161057.520552-5-bjorn@kernel.org --- tools/testing/selftests/bpf/Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tools/testing/selftests/bpf/Makefile b/tools/testing/selftests/bpf/Makefile index f49cb5fc85af8..54b0a41a37750 100644 --- a/tools/testing/selftests/bpf/Makefile +++ b/tools/testing/selftests/bpf/Makefile @@ -277,7 +277,7 @@ $(RESOLVE_BTFIDS): $(HOST_BPFOBJ) | $(HOST_BUILD_DIR)/resolve_btfids \ define get_sys_includes $(shell $(1) -v -E - &1 \ | sed -n '/<...> search starts here:/,/End of search list./{ s| \(/.*\)|-idirafter \1|p }') \ -$(shell $(1) -dM -E - Date: Mon, 25 Oct 2021 14:40:22 +0800 Subject: [PATCH 168/181] bpf: Factor out a helper to prepare trampoline for struct_ops prog Factor out a helper bpf_struct_ops_prepare_trampoline() to prepare trampoline for BPF_PROG_TYPE_STRUCT_OPS prog. It will be used by .test_run callback in following patch. Signed-off-by: Hou Tao Signed-off-by: Alexei Starovoitov Acked-by: Martin KaFai Lau Link: https://lore.kernel.org/bpf/20211025064025.2567443-2-houtao1@huawei.com --- include/linux/bpf.h | 4 ++++ kernel/bpf/bpf_struct_ops.c | 29 +++++++++++++++++++---------- 2 files changed, 23 insertions(+), 10 deletions(-) diff --git a/include/linux/bpf.h b/include/linux/bpf.h index 6deebf8bf78fb..aabd3540aaaf9 100644 --- a/include/linux/bpf.h +++ b/include/linux/bpf.h @@ -1000,6 +1000,10 @@ bool bpf_struct_ops_get(const void *kdata); void bpf_struct_ops_put(const void *kdata); int bpf_struct_ops_map_sys_lookup_elem(struct bpf_map *map, void *key, void *value); +int bpf_struct_ops_prepare_trampoline(struct bpf_tramp_progs *tprogs, + struct bpf_prog *prog, + const struct btf_func_model *model, + void *image, void *image_end); static inline bool bpf_try_module_get(const void *data, struct module *owner) { if (owner == BPF_MODULE_OWNER) diff --git a/kernel/bpf/bpf_struct_ops.c b/kernel/bpf/bpf_struct_ops.c index 9abcc33f02cf0..44be101f25621 100644 --- a/kernel/bpf/bpf_struct_ops.c +++ b/kernel/bpf/bpf_struct_ops.c @@ -312,6 +312,20 @@ static int check_zero_holes(const struct btf_type *t, void *data) return 0; } +int bpf_struct_ops_prepare_trampoline(struct bpf_tramp_progs *tprogs, + struct bpf_prog *prog, + const struct btf_func_model *model, + void *image, void *image_end) +{ + u32 flags; + + tprogs[BPF_TRAMP_FENTRY].progs[0] = prog; + tprogs[BPF_TRAMP_FENTRY].nr_progs = 1; + flags = model->ret_size > 0 ? BPF_TRAMP_F_RET_FENTRY_RET : 0; + return arch_prepare_bpf_trampoline(NULL, image, image_end, + model, flags, tprogs, NULL); +} + static int bpf_struct_ops_map_update_elem(struct bpf_map *map, void *key, void *value, u64 flags) { @@ -323,7 +337,7 @@ static int bpf_struct_ops_map_update_elem(struct bpf_map *map, void *key, struct bpf_tramp_progs *tprogs = NULL; void *udata, *kdata; int prog_fd, err = 0; - void *image; + void *image, *image_end; u32 i; if (flags) @@ -363,12 +377,12 @@ static int bpf_struct_ops_map_update_elem(struct bpf_map *map, void *key, udata = &uvalue->data; kdata = &kvalue->data; image = st_map->image; + image_end = st_map->image + PAGE_SIZE; for_each_member(i, t, member) { const struct btf_type *mtype, *ptype; struct bpf_prog *prog; u32 moff; - u32 flags; moff = btf_member_bit_offset(t, member) / 8; ptype = btf_type_resolve_ptr(btf_vmlinux, member->type, NULL); @@ -430,14 +444,9 @@ static int bpf_struct_ops_map_update_elem(struct bpf_map *map, void *key, goto reset_unlock; } - tprogs[BPF_TRAMP_FENTRY].progs[0] = prog; - tprogs[BPF_TRAMP_FENTRY].nr_progs = 1; - flags = st_ops->func_models[i].ret_size > 0 ? - BPF_TRAMP_F_RET_FENTRY_RET : 0; - err = arch_prepare_bpf_trampoline(NULL, image, - st_map->image + PAGE_SIZE, - &st_ops->func_models[i], - flags, tprogs, NULL); + err = bpf_struct_ops_prepare_trampoline(tprogs, prog, + &st_ops->func_models[i], + image, image_end); if (err < 0) goto reset_unlock; From 35346ab64132d0f5919b06932d708c0d10360553 Mon Sep 17 00:00:00 2001 From: Hou Tao Date: Mon, 25 Oct 2021 14:40:23 +0800 Subject: [PATCH 169/181] bpf: Factor out helpers for ctx access checking Factor out two helpers to check the read access of ctx for raw tp and BTF function. bpf_tracing_ctx_access() is used to check the read access to argument is valid, and bpf_tracing_btf_ctx_access() checks whether the btf type of argument is valid besides the checking of argument read. bpf_tracing_btf_ctx_access() will be used by the following patch. Signed-off-by: Hou Tao Signed-off-by: Alexei Starovoitov Acked-by: Martin KaFai Lau Link: https://lore.kernel.org/bpf/20211025064025.2567443-3-houtao1@huawei.com --- include/linux/bpf.h | 23 +++++++++++++++++++++++ kernel/trace/bpf_trace.c | 16 ++-------------- net/ipv4/bpf_tcp_ca.c | 9 +-------- 3 files changed, 26 insertions(+), 22 deletions(-) diff --git a/include/linux/bpf.h b/include/linux/bpf.h index aabd3540aaaf9..67f71e7def56d 100644 --- a/include/linux/bpf.h +++ b/include/linux/bpf.h @@ -1650,6 +1650,29 @@ bool bpf_prog_test_check_kfunc_call(u32 kfunc_id, struct module *owner); bool btf_ctx_access(int off, int size, enum bpf_access_type type, const struct bpf_prog *prog, struct bpf_insn_access_aux *info); + +static inline bool bpf_tracing_ctx_access(int off, int size, + enum bpf_access_type type) +{ + if (off < 0 || off >= sizeof(__u64) * MAX_BPF_FUNC_ARGS) + return false; + if (type != BPF_READ) + return false; + if (off % size != 0) + return false; + return true; +} + +static inline bool bpf_tracing_btf_ctx_access(int off, int size, + enum bpf_access_type type, + const struct bpf_prog *prog, + struct bpf_insn_access_aux *info) +{ + if (!bpf_tracing_ctx_access(off, size, type)) + return false; + return btf_ctx_access(off, size, type, prog, info); +} + int btf_struct_access(struct bpf_verifier_log *log, const struct btf *btf, const struct btf_type *t, int off, int size, enum bpf_access_type atype, diff --git a/kernel/trace/bpf_trace.c b/kernel/trace/bpf_trace.c index cbcd0d6fca7c7..7396488793ff7 100644 --- a/kernel/trace/bpf_trace.c +++ b/kernel/trace/bpf_trace.c @@ -1646,13 +1646,7 @@ static bool raw_tp_prog_is_valid_access(int off, int size, const struct bpf_prog *prog, struct bpf_insn_access_aux *info) { - if (off < 0 || off >= sizeof(__u64) * MAX_BPF_FUNC_ARGS) - return false; - if (type != BPF_READ) - return false; - if (off % size != 0) - return false; - return true; + return bpf_tracing_ctx_access(off, size, type); } static bool tracing_prog_is_valid_access(int off, int size, @@ -1660,13 +1654,7 @@ static bool tracing_prog_is_valid_access(int off, int size, const struct bpf_prog *prog, struct bpf_insn_access_aux *info) { - if (off < 0 || off >= sizeof(__u64) * MAX_BPF_FUNC_ARGS) - return false; - if (type != BPF_READ) - return false; - if (off % size != 0) - return false; - return btf_ctx_access(off, size, type, prog, info); + return bpf_tracing_btf_ctx_access(off, size, type, prog, info); } int __weak bpf_prog_test_run_tracing(struct bpf_prog *prog, diff --git a/net/ipv4/bpf_tcp_ca.c b/net/ipv4/bpf_tcp_ca.c index 57709ac09fb2b..2cf02b4d77fbf 100644 --- a/net/ipv4/bpf_tcp_ca.c +++ b/net/ipv4/bpf_tcp_ca.c @@ -81,14 +81,7 @@ static bool bpf_tcp_ca_is_valid_access(int off, int size, const struct bpf_prog *prog, struct bpf_insn_access_aux *info) { - if (off < 0 || off >= sizeof(__u64) * MAX_BPF_FUNC_ARGS) - return false; - if (type != BPF_READ) - return false; - if (off % size != 0) - return false; - - if (!btf_ctx_access(off, size, type, prog, info)) + if (!bpf_tracing_btf_ctx_access(off, size, type, prog, info)) return false; if (info->reg_type == PTR_TO_BTF_ID && info->btf_id == sock_id) From c196906d50e360d82ed9aa5596a9d0ce89b7ab78 Mon Sep 17 00:00:00 2001 From: Hou Tao Date: Mon, 25 Oct 2021 14:40:24 +0800 Subject: [PATCH 170/181] bpf: Add dummy BPF STRUCT_OPS for test purpose Currently the test of BPF STRUCT_OPS depends on the specific bpf implementation of tcp_congestion_ops, but it can not cover all basic functionalities (e.g, return value handling), so introduce a dummy BPF STRUCT_OPS for test purpose. Loading a bpf_dummy_ops implementation from userspace is prohibited, and its only purpose is to run BPF_PROG_TYPE_STRUCT_OPS program through bpf(BPF_PROG_TEST_RUN). Now programs for test_1() & test_2() are supported. The following three cases are exercised in bpf_dummy_struct_ops_test_run(): (1) test and check the value returned from state arg in test_1(state) The content of state is copied from userspace pointer and copied back after calling test_1(state). The user pointer is saved in an u64 array and the array address is passed through ctx_in. (2) test and check the return value of test_1(NULL) Just simulate the case in which an invalid input argument is passed in. (3) test multiple arguments passing in test_2(state, ...) 5 arguments are passed through ctx_in in form of u64 array. The first element of array is userspace pointer of state and others 4 arguments follow. Signed-off-by: Hou Tao Signed-off-by: Alexei Starovoitov Acked-by: Martin KaFai Lau Link: https://lore.kernel.org/bpf/20211025064025.2567443-4-houtao1@huawei.com --- include/linux/bpf.h | 16 +++ kernel/bpf/bpf_struct_ops.c | 3 + kernel/bpf/bpf_struct_ops_types.h | 3 + net/bpf/Makefile | 3 + net/bpf/bpf_dummy_struct_ops.c | 200 ++++++++++++++++++++++++++++++ 5 files changed, 225 insertions(+) create mode 100644 net/bpf/bpf_dummy_struct_ops.c diff --git a/include/linux/bpf.h b/include/linux/bpf.h index 67f71e7def56d..c098089c1b54d 100644 --- a/include/linux/bpf.h +++ b/include/linux/bpf.h @@ -1018,6 +1018,22 @@ static inline void bpf_module_put(const void *data, struct module *owner) else module_put(owner); } + +#ifdef CONFIG_NET +/* Define it here to avoid the use of forward declaration */ +struct bpf_dummy_ops_state { + int val; +}; + +struct bpf_dummy_ops { + int (*test_1)(struct bpf_dummy_ops_state *cb); + int (*test_2)(struct bpf_dummy_ops_state *cb, int a1, unsigned short a2, + char a3, unsigned long a4); +}; + +int bpf_struct_ops_test_run(struct bpf_prog *prog, const union bpf_attr *kattr, + union bpf_attr __user *uattr); +#endif #else static inline const struct bpf_struct_ops *bpf_struct_ops_find(u32 type_id) { diff --git a/kernel/bpf/bpf_struct_ops.c b/kernel/bpf/bpf_struct_ops.c index 44be101f25621..8ecfe47527697 100644 --- a/kernel/bpf/bpf_struct_ops.c +++ b/kernel/bpf/bpf_struct_ops.c @@ -93,6 +93,9 @@ const struct bpf_verifier_ops bpf_struct_ops_verifier_ops = { }; const struct bpf_prog_ops bpf_struct_ops_prog_ops = { +#ifdef CONFIG_NET + .test_run = bpf_struct_ops_test_run, +#endif }; static const struct btf_type *module_type; diff --git a/kernel/bpf/bpf_struct_ops_types.h b/kernel/bpf/bpf_struct_ops_types.h index 066d83ea1c99c..5678a9ddf8178 100644 --- a/kernel/bpf/bpf_struct_ops_types.h +++ b/kernel/bpf/bpf_struct_ops_types.h @@ -2,6 +2,9 @@ /* internal file - do not include directly */ #ifdef CONFIG_BPF_JIT +#ifdef CONFIG_NET +BPF_STRUCT_OPS_TYPE(bpf_dummy_ops) +#endif #ifdef CONFIG_INET #include BPF_STRUCT_OPS_TYPE(tcp_congestion_ops) diff --git a/net/bpf/Makefile b/net/bpf/Makefile index 1c0a98d8c28f0..1ebe270bde238 100644 --- a/net/bpf/Makefile +++ b/net/bpf/Makefile @@ -1,2 +1,5 @@ # SPDX-License-Identifier: GPL-2.0-only obj-$(CONFIG_BPF_SYSCALL) := test_run.o +ifeq ($(CONFIG_BPF_JIT),y) +obj-$(CONFIG_BPF_SYSCALL) += bpf_dummy_struct_ops.o +endif diff --git a/net/bpf/bpf_dummy_struct_ops.c b/net/bpf/bpf_dummy_struct_ops.c new file mode 100644 index 0000000000000..fbc896323bec9 --- /dev/null +++ b/net/bpf/bpf_dummy_struct_ops.c @@ -0,0 +1,200 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Copyright (C) 2021. Huawei Technologies Co., Ltd + */ +#include +#include +#include +#include + +extern struct bpf_struct_ops bpf_bpf_dummy_ops; + +/* A common type for test_N with return value in bpf_dummy_ops */ +typedef int (*dummy_ops_test_ret_fn)(struct bpf_dummy_ops_state *state, ...); + +struct bpf_dummy_ops_test_args { + u64 args[MAX_BPF_FUNC_ARGS]; + struct bpf_dummy_ops_state state; +}; + +static struct bpf_dummy_ops_test_args * +dummy_ops_init_args(const union bpf_attr *kattr, unsigned int nr) +{ + __u32 size_in; + struct bpf_dummy_ops_test_args *args; + void __user *ctx_in; + void __user *u_state; + + size_in = kattr->test.ctx_size_in; + if (size_in != sizeof(u64) * nr) + return ERR_PTR(-EINVAL); + + args = kzalloc(sizeof(*args), GFP_KERNEL); + if (!args) + return ERR_PTR(-ENOMEM); + + ctx_in = u64_to_user_ptr(kattr->test.ctx_in); + if (copy_from_user(args->args, ctx_in, size_in)) + goto out; + + /* args[0] is 0 means state argument of test_N will be NULL */ + u_state = u64_to_user_ptr(args->args[0]); + if (u_state && copy_from_user(&args->state, u_state, + sizeof(args->state))) + goto out; + + return args; +out: + kfree(args); + return ERR_PTR(-EFAULT); +} + +static int dummy_ops_copy_args(struct bpf_dummy_ops_test_args *args) +{ + void __user *u_state; + + u_state = u64_to_user_ptr(args->args[0]); + if (u_state && copy_to_user(u_state, &args->state, sizeof(args->state))) + return -EFAULT; + + return 0; +} + +static int dummy_ops_call_op(void *image, struct bpf_dummy_ops_test_args *args) +{ + dummy_ops_test_ret_fn test = (void *)image; + struct bpf_dummy_ops_state *state = NULL; + + /* state needs to be NULL if args[0] is 0 */ + if (args->args[0]) + state = &args->state; + return test(state, args->args[1], args->args[2], + args->args[3], args->args[4]); +} + +int bpf_struct_ops_test_run(struct bpf_prog *prog, const union bpf_attr *kattr, + union bpf_attr __user *uattr) +{ + const struct bpf_struct_ops *st_ops = &bpf_bpf_dummy_ops; + const struct btf_type *func_proto; + struct bpf_dummy_ops_test_args *args; + struct bpf_tramp_progs *tprogs; + void *image = NULL; + unsigned int op_idx; + int prog_ret; + int err; + + if (prog->aux->attach_btf_id != st_ops->type_id) + return -EOPNOTSUPP; + + func_proto = prog->aux->attach_func_proto; + args = dummy_ops_init_args(kattr, btf_type_vlen(func_proto)); + if (IS_ERR(args)) + return PTR_ERR(args); + + tprogs = kcalloc(BPF_TRAMP_MAX, sizeof(*tprogs), GFP_KERNEL); + if (!tprogs) { + err = -ENOMEM; + goto out; + } + + image = bpf_jit_alloc_exec(PAGE_SIZE); + if (!image) { + err = -ENOMEM; + goto out; + } + set_vm_flush_reset_perms(image); + + op_idx = prog->expected_attach_type; + err = bpf_struct_ops_prepare_trampoline(tprogs, prog, + &st_ops->func_models[op_idx], + image, image + PAGE_SIZE); + if (err < 0) + goto out; + + set_memory_ro((long)image, 1); + set_memory_x((long)image, 1); + prog_ret = dummy_ops_call_op(image, args); + + err = dummy_ops_copy_args(args); + if (err) + goto out; + if (put_user(prog_ret, &uattr->test.retval)) + err = -EFAULT; +out: + kfree(args); + bpf_jit_free_exec(image); + kfree(tprogs); + return err; +} + +static int bpf_dummy_init(struct btf *btf) +{ + return 0; +} + +static bool bpf_dummy_ops_is_valid_access(int off, int size, + enum bpf_access_type type, + const struct bpf_prog *prog, + struct bpf_insn_access_aux *info) +{ + return bpf_tracing_btf_ctx_access(off, size, type, prog, info); +} + +static int bpf_dummy_ops_btf_struct_access(struct bpf_verifier_log *log, + const struct btf *btf, + const struct btf_type *t, int off, + int size, enum bpf_access_type atype, + u32 *next_btf_id) +{ + const struct btf_type *state; + s32 type_id; + int err; + + type_id = btf_find_by_name_kind(btf, "bpf_dummy_ops_state", + BTF_KIND_STRUCT); + if (type_id < 0) + return -EINVAL; + + state = btf_type_by_id(btf, type_id); + if (t != state) { + bpf_log(log, "only access to bpf_dummy_ops_state is supported\n"); + return -EACCES; + } + + err = btf_struct_access(log, btf, t, off, size, atype, next_btf_id); + if (err < 0) + return err; + + return atype == BPF_READ ? err : NOT_INIT; +} + +static const struct bpf_verifier_ops bpf_dummy_verifier_ops = { + .is_valid_access = bpf_dummy_ops_is_valid_access, + .btf_struct_access = bpf_dummy_ops_btf_struct_access, +}; + +static int bpf_dummy_init_member(const struct btf_type *t, + const struct btf_member *member, + void *kdata, const void *udata) +{ + return -EOPNOTSUPP; +} + +static int bpf_dummy_reg(void *kdata) +{ + return -EOPNOTSUPP; +} + +static void bpf_dummy_unreg(void *kdata) +{ +} + +struct bpf_struct_ops bpf_bpf_dummy_ops = { + .verifier_ops = &bpf_dummy_verifier_ops, + .init = bpf_dummy_init, + .init_member = bpf_dummy_init_member, + .reg = bpf_dummy_reg, + .unreg = bpf_dummy_unreg, + .name = "bpf_dummy_ops", +}; From 31122b2f768bde5281037141bb658f117bea102e Mon Sep 17 00:00:00 2001 From: Hou Tao Date: Mon, 25 Oct 2021 14:40:25 +0800 Subject: [PATCH 171/181] selftests/bpf: Add test cases for struct_ops prog Running a BPF_PROG_TYPE_STRUCT_OPS prog for dummy_st_ops::test_N() through bpf_prog_test_run(). Four test cases are added: (1) attach dummy_st_ops should fail (2) function return value of bpf_dummy_ops::test_1() is expected (3) pointer argument of bpf_dummy_ops::test_1() works as expected (4) multiple arguments passed to bpf_dummy_ops::test_2() are correct Signed-off-by: Hou Tao Signed-off-by: Alexei Starovoitov Acked-by: Martin KaFai Lau Link: https://lore.kernel.org/bpf/20211025064025.2567443-5-houtao1@huawei.com --- .../selftests/bpf/prog_tests/dummy_st_ops.c | 115 ++++++++++++++++++ .../selftests/bpf/progs/dummy_st_ops.c | 50 ++++++++ 2 files changed, 165 insertions(+) create mode 100644 tools/testing/selftests/bpf/prog_tests/dummy_st_ops.c create mode 100644 tools/testing/selftests/bpf/progs/dummy_st_ops.c diff --git a/tools/testing/selftests/bpf/prog_tests/dummy_st_ops.c b/tools/testing/selftests/bpf/prog_tests/dummy_st_ops.c new file mode 100644 index 0000000000000..cbaa44ffb8c6e --- /dev/null +++ b/tools/testing/selftests/bpf/prog_tests/dummy_st_ops.c @@ -0,0 +1,115 @@ +// SPDX-License-Identifier: GPL-2.0 +/* Copyright (C) 2021. Huawei Technologies Co., Ltd */ +#include +#include "dummy_st_ops.skel.h" + +/* Need to keep consistent with definition in include/linux/bpf.h */ +struct bpf_dummy_ops_state { + int val; +}; + +static void test_dummy_st_ops_attach(void) +{ + struct dummy_st_ops *skel; + struct bpf_link *link; + + skel = dummy_st_ops__open_and_load(); + if (!ASSERT_OK_PTR(skel, "dummy_st_ops_load")) + return; + + link = bpf_map__attach_struct_ops(skel->maps.dummy_1); + ASSERT_EQ(libbpf_get_error(link), -EOPNOTSUPP, "dummy_st_ops_attach"); + + dummy_st_ops__destroy(skel); +} + +static void test_dummy_init_ret_value(void) +{ + __u64 args[1] = {0}; + struct bpf_prog_test_run_attr attr = { + .ctx_size_in = sizeof(args), + .ctx_in = args, + }; + struct dummy_st_ops *skel; + int fd, err; + + skel = dummy_st_ops__open_and_load(); + if (!ASSERT_OK_PTR(skel, "dummy_st_ops_load")) + return; + + fd = bpf_program__fd(skel->progs.test_1); + attr.prog_fd = fd; + err = bpf_prog_test_run_xattr(&attr); + ASSERT_OK(err, "test_run"); + ASSERT_EQ(attr.retval, 0xf2f3f4f5, "test_ret"); + + dummy_st_ops__destroy(skel); +} + +static void test_dummy_init_ptr_arg(void) +{ + int exp_retval = 0xbeef; + struct bpf_dummy_ops_state in_state = { + .val = exp_retval, + }; + __u64 args[1] = {(unsigned long)&in_state}; + struct bpf_prog_test_run_attr attr = { + .ctx_size_in = sizeof(args), + .ctx_in = args, + }; + struct dummy_st_ops *skel; + int fd, err; + + skel = dummy_st_ops__open_and_load(); + if (!ASSERT_OK_PTR(skel, "dummy_st_ops_load")) + return; + + fd = bpf_program__fd(skel->progs.test_1); + attr.prog_fd = fd; + err = bpf_prog_test_run_xattr(&attr); + ASSERT_OK(err, "test_run"); + ASSERT_EQ(in_state.val, 0x5a, "test_ptr_ret"); + ASSERT_EQ(attr.retval, exp_retval, "test_ret"); + + dummy_st_ops__destroy(skel); +} + +static void test_dummy_multiple_args(void) +{ + __u64 args[5] = {0, -100, 0x8a5f, 'c', 0x1234567887654321ULL}; + struct bpf_prog_test_run_attr attr = { + .ctx_size_in = sizeof(args), + .ctx_in = args, + }; + struct dummy_st_ops *skel; + int fd, err; + size_t i; + char name[8]; + + skel = dummy_st_ops__open_and_load(); + if (!ASSERT_OK_PTR(skel, "dummy_st_ops_load")) + return; + + fd = bpf_program__fd(skel->progs.test_2); + attr.prog_fd = fd; + err = bpf_prog_test_run_xattr(&attr); + ASSERT_OK(err, "test_run"); + for (i = 0; i < ARRAY_SIZE(args); i++) { + snprintf(name, sizeof(name), "arg %zu", i); + ASSERT_EQ(skel->bss->test_2_args[i], args[i], name); + } + + dummy_st_ops__destroy(skel); +} + +void test_dummy_st_ops(void) +{ + if (test__start_subtest("dummy_st_ops_attach")) + test_dummy_st_ops_attach(); + if (test__start_subtest("dummy_init_ret_value")) + test_dummy_init_ret_value(); + if (test__start_subtest("dummy_init_ptr_arg")) + test_dummy_init_ptr_arg(); + if (test__start_subtest("dummy_multiple_args")) + test_dummy_multiple_args(); +} diff --git a/tools/testing/selftests/bpf/progs/dummy_st_ops.c b/tools/testing/selftests/bpf/progs/dummy_st_ops.c new file mode 100644 index 0000000000000..ead87edb75e25 --- /dev/null +++ b/tools/testing/selftests/bpf/progs/dummy_st_ops.c @@ -0,0 +1,50 @@ +// SPDX-License-Identifier: GPL-2.0 +/* Copyright (C) 2021. Huawei Technologies Co., Ltd */ +#include +#include +#include + +struct bpf_dummy_ops_state { + int val; +} __attribute__((preserve_access_index)); + +struct bpf_dummy_ops { + int (*test_1)(struct bpf_dummy_ops_state *state); + int (*test_2)(struct bpf_dummy_ops_state *state, int a1, unsigned short a2, + char a3, unsigned long a4); +}; + +char _license[] SEC("license") = "GPL"; + +SEC("struct_ops/test_1") +int BPF_PROG(test_1, struct bpf_dummy_ops_state *state) +{ + int ret; + + if (!state) + return 0xf2f3f4f5; + + ret = state->val; + state->val = 0x5a; + return ret; +} + +__u64 test_2_args[5]; + +SEC("struct_ops/test_2") +int BPF_PROG(test_2, struct bpf_dummy_ops_state *state, int a1, unsigned short a2, + char a3, unsigned long a4) +{ + test_2_args[0] = (unsigned long)state; + test_2_args[1] = a1; + test_2_args[2] = a2; + test_2_args[3] = a3; + test_2_args[4] = a4; + return 0; +} + +SEC(".struct_ops") +struct bpf_dummy_ops dummy_1 = { + .test_1 = (void *)test_1, + .test_2 = (void *)test_2, +}; From 6fdc348006fe2c8f0765f6eecf2e3cbab06c60b5 Mon Sep 17 00:00:00 2001 From: Joanne Koong Date: Fri, 29 Oct 2021 15:49:07 -0700 Subject: [PATCH 172/181] bpf: Bloom filter map naming fixups This patch has two changes in the kernel bloom filter map implementation: 1) Change the names of map-ops functions to include the "bloom_map" prefix. As Martin pointed out on a previous patchset, having generic map-ops names may be confusing in tracing and in perf-report. 2) Drop the "& 0xF" when getting nr_hash_funcs, since we already ascertain that no other bits in map_extra beyond the first 4 bits can be set. Signed-off-by: Joanne Koong Signed-off-by: Alexei Starovoitov Acked-by: Yonghong Song Link: https://lore.kernel.org/bpf/20211029224909.1721024-2-joannekoong@fb.com --- kernel/bpf/bloom_filter.c | 49 +++++++++++++++++++++------------------ 1 file changed, 26 insertions(+), 23 deletions(-) diff --git a/kernel/bpf/bloom_filter.c b/kernel/bpf/bloom_filter.c index 7c50232b7571f..073c2f2cab8b4 100644 --- a/kernel/bpf/bloom_filter.c +++ b/kernel/bpf/bloom_filter.c @@ -40,7 +40,7 @@ static u32 hash(struct bpf_bloom_filter *bloom, void *value, return h & bloom->bitset_mask; } -static int peek_elem(struct bpf_map *map, void *value) +static int bloom_map_peek_elem(struct bpf_map *map, void *value) { struct bpf_bloom_filter *bloom = container_of(map, struct bpf_bloom_filter, map); @@ -55,7 +55,7 @@ static int peek_elem(struct bpf_map *map, void *value) return 0; } -static int push_elem(struct bpf_map *map, void *value, u64 flags) +static int bloom_map_push_elem(struct bpf_map *map, void *value, u64 flags) { struct bpf_bloom_filter *bloom = container_of(map, struct bpf_bloom_filter, map); @@ -72,12 +72,12 @@ static int push_elem(struct bpf_map *map, void *value, u64 flags) return 0; } -static int pop_elem(struct bpf_map *map, void *value) +static int bloom_map_pop_elem(struct bpf_map *map, void *value) { return -EOPNOTSUPP; } -static struct bpf_map *map_alloc(union bpf_attr *attr) +static struct bpf_map *bloom_map_alloc(union bpf_attr *attr) { u32 bitset_bytes, bitset_mask, nr_hash_funcs, nr_bits; int numa_node = bpf_map_attr_numa_node(attr); @@ -90,11 +90,13 @@ static struct bpf_map *map_alloc(union bpf_attr *attr) attr->max_entries == 0 || attr->map_flags & ~BLOOM_CREATE_FLAG_MASK || !bpf_map_flags_access_ok(attr->map_flags) || + /* The lower 4 bits of map_extra (0xF) specify the number + * of hash functions + */ (attr->map_extra & ~0xF)) return ERR_PTR(-EINVAL); - /* The lower 4 bits of map_extra specify the number of hash functions */ - nr_hash_funcs = attr->map_extra & 0xF; + nr_hash_funcs = attr->map_extra; if (nr_hash_funcs == 0) /* Default to using 5 hash functions if unspecified */ nr_hash_funcs = 5; @@ -150,7 +152,7 @@ static struct bpf_map *map_alloc(union bpf_attr *attr) return &bloom->map; } -static void map_free(struct bpf_map *map) +static void bloom_map_free(struct bpf_map *map) { struct bpf_bloom_filter *bloom = container_of(map, struct bpf_bloom_filter, map); @@ -158,38 +160,39 @@ static void map_free(struct bpf_map *map) bpf_map_area_free(bloom); } -static void *lookup_elem(struct bpf_map *map, void *key) +static void *bloom_map_lookup_elem(struct bpf_map *map, void *key) { /* The eBPF program should use map_peek_elem instead */ return ERR_PTR(-EINVAL); } -static int update_elem(struct bpf_map *map, void *key, - void *value, u64 flags) +static int bloom_map_update_elem(struct bpf_map *map, void *key, + void *value, u64 flags) { /* The eBPF program should use map_push_elem instead */ return -EINVAL; } -static int check_btf(const struct bpf_map *map, const struct btf *btf, - const struct btf_type *key_type, - const struct btf_type *value_type) +static int bloom_map_check_btf(const struct bpf_map *map, + const struct btf *btf, + const struct btf_type *key_type, + const struct btf_type *value_type) { /* Bloom filter maps are keyless */ return btf_type_is_void(key_type) ? 0 : -EINVAL; } -static int bpf_bloom_btf_id; +static int bpf_bloom_map_btf_id; const struct bpf_map_ops bloom_filter_map_ops = { .map_meta_equal = bpf_map_meta_equal, - .map_alloc = map_alloc, - .map_free = map_free, - .map_push_elem = push_elem, - .map_peek_elem = peek_elem, - .map_pop_elem = pop_elem, - .map_lookup_elem = lookup_elem, - .map_update_elem = update_elem, - .map_check_btf = check_btf, + .map_alloc = bloom_map_alloc, + .map_free = bloom_map_free, + .map_push_elem = bloom_map_push_elem, + .map_peek_elem = bloom_map_peek_elem, + .map_pop_elem = bloom_map_pop_elem, + .map_lookup_elem = bloom_map_lookup_elem, + .map_update_elem = bloom_map_update_elem, + .map_check_btf = bloom_map_check_btf, .map_btf_name = "bpf_bloom_filter", - .map_btf_id = &bpf_bloom_btf_id, + .map_btf_id = &bpf_bloom_map_btf_id, }; From 8845b4681bf44b9d2d2badf2c67cf476e42a86bd Mon Sep 17 00:00:00 2001 From: Joanne Koong Date: Fri, 29 Oct 2021 15:49:08 -0700 Subject: [PATCH 173/181] bpf: Add alignment padding for "map_extra" + consolidate holes This patch makes 2 changes regarding alignment padding for the "map_extra" field. 1) In the kernel header, "map_extra" and "btf_value_type_id" are rearranged to consolidate the hole. Before: struct bpf_map { ... u32 max_entries; /* 36 4 */ u32 map_flags; /* 40 4 */ /* XXX 4 bytes hole, try to pack */ u64 map_extra; /* 48 8 */ int spin_lock_off; /* 56 4 */ int timer_off; /* 60 4 */ /* --- cacheline 1 boundary (64 bytes) --- */ u32 id; /* 64 4 */ int numa_node; /* 68 4 */ ... bool frozen; /* 117 1 */ /* XXX 10 bytes hole, try to pack */ /* --- cacheline 2 boundary (128 bytes) --- */ ... struct work_struct work; /* 144 72 */ /* --- cacheline 3 boundary (192 bytes) was 24 bytes ago --- */ struct mutex freeze_mutex; /* 216 144 */ /* --- cacheline 5 boundary (320 bytes) was 40 bytes ago --- */ u64 writecnt; /* 360 8 */ /* size: 384, cachelines: 6, members: 26 */ /* sum members: 354, holes: 2, sum holes: 14 */ /* padding: 16 */ /* forced alignments: 2, forced holes: 1, sum forced holes: 10 */ } __attribute__((__aligned__(64))); After: struct bpf_map { ... u32 max_entries; /* 36 4 */ u64 map_extra; /* 40 8 */ u32 map_flags; /* 48 4 */ int spin_lock_off; /* 52 4 */ int timer_off; /* 56 4 */ u32 id; /* 60 4 */ /* --- cacheline 1 boundary (64 bytes) --- */ int numa_node; /* 64 4 */ ... bool frozen /* 113 1 */ /* XXX 14 bytes hole, try to pack */ /* --- cacheline 2 boundary (128 bytes) --- */ ... struct work_struct work; /* 144 72 */ /* --- cacheline 3 boundary (192 bytes) was 24 bytes ago --- */ struct mutex freeze_mutex; /* 216 144 */ /* --- cacheline 5 boundary (320 bytes) was 40 bytes ago --- */ u64 writecnt; /* 360 8 */ /* size: 384, cachelines: 6, members: 26 */ /* sum members: 354, holes: 1, sum holes: 14 */ /* padding: 16 */ /* forced alignments: 2, forced holes: 1, sum forced holes: 14 */ } __attribute__((__aligned__(64))); 2) Add alignment padding to the bpf_map_info struct More details can be found in commit 36f9814a494a ("bpf: fix uapi hole for 32 bit compat applications") Signed-off-by: Joanne Koong Signed-off-by: Alexei Starovoitov Acked-by: Yonghong Song Link: https://lore.kernel.org/bpf/20211029224909.1721024-3-joannekoong@fb.com --- include/linux/bpf.h | 6 +++--- include/uapi/linux/bpf.h | 1 + tools/include/uapi/linux/bpf.h | 1 + 3 files changed, 5 insertions(+), 3 deletions(-) diff --git a/include/linux/bpf.h b/include/linux/bpf.h index c098089c1b54d..f6743d4bb531a 100644 --- a/include/linux/bpf.h +++ b/include/linux/bpf.h @@ -168,23 +168,23 @@ struct bpf_map { u32 key_size; u32 value_size; u32 max_entries; - u32 map_flags; u64 map_extra; /* any per-map-type extra fields */ + u32 map_flags; int spin_lock_off; /* >=0 valid offset, <0 error */ int timer_off; /* >=0 valid offset, <0 error */ u32 id; int numa_node; u32 btf_key_type_id; u32 btf_value_type_id; + u32 btf_vmlinux_value_type_id; struct btf *btf; #ifdef CONFIG_MEMCG_KMEM struct mem_cgroup *memcg; #endif char name[BPF_OBJ_NAME_LEN]; - u32 btf_vmlinux_value_type_id; bool bypass_spec_v1; bool frozen; /* write-once; write-protected by freeze_mutex */ - /* 22 bytes hole */ + /* 14 bytes hole */ /* The 3rd and 4th cacheline with misc members to avoid false sharing * particularly with refcounting. diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h index bd0c9f0487f6c..ba5af15e25f5c 100644 --- a/include/uapi/linux/bpf.h +++ b/include/uapi/linux/bpf.h @@ -5662,6 +5662,7 @@ struct bpf_map_info { __u32 btf_id; __u32 btf_key_type_id; __u32 btf_value_type_id; + __u32 :32; /* alignment pad */ __u64 map_extra; } __attribute__((aligned(8))); diff --git a/tools/include/uapi/linux/bpf.h b/tools/include/uapi/linux/bpf.h index bd0c9f0487f6c..ba5af15e25f5c 100644 --- a/tools/include/uapi/linux/bpf.h +++ b/tools/include/uapi/linux/bpf.h @@ -5662,6 +5662,7 @@ struct bpf_map_info { __u32 btf_id; __u32 btf_key_type_id; __u32 btf_value_type_id; + __u32 :32; /* alignment pad */ __u64 map_extra; } __attribute__((aligned(8))); From 7a67087250f0003acc1b9e20eda01635e1e51f9a Mon Sep 17 00:00:00 2001 From: Joanne Koong Date: Fri, 29 Oct 2021 15:49:09 -0700 Subject: [PATCH 174/181] selftests/bpf: Add bloom map success test for userspace calls This patch has two changes: 1) Adds a new function "test_success_cases" to test successfully creating + adding + looking up a value in a bloom filter map from the userspace side. 2) Use bpf_create_map instead of bpf_create_map_xattr in the "test_fail_cases" and test_inner_map to make the code look cleaner. Signed-off-by: Joanne Koong Signed-off-by: Alexei Starovoitov Acked-by: Yonghong Song Link: https://lore.kernel.org/bpf/20211029224909.1721024-4-joannekoong@fb.com --- .../bpf/prog_tests/bloom_filter_map.c | 59 +++++++++++-------- 1 file changed, 33 insertions(+), 26 deletions(-) diff --git a/tools/testing/selftests/bpf/prog_tests/bloom_filter_map.c b/tools/testing/selftests/bpf/prog_tests/bloom_filter_map.c index 9aa3fbed918be..be73e3de66680 100644 --- a/tools/testing/selftests/bpf/prog_tests/bloom_filter_map.c +++ b/tools/testing/selftests/bpf/prog_tests/bloom_filter_map.c @@ -7,44 +7,31 @@ static void test_fail_cases(void) { - struct bpf_create_map_attr xattr = { - .name = "bloom_filter_map", - .map_type = BPF_MAP_TYPE_BLOOM_FILTER, - .max_entries = 100, - .value_size = 11, - }; __u32 value; int fd, err; /* Invalid key size */ - xattr.key_size = 4; - fd = bpf_create_map_xattr(&xattr); + fd = bpf_create_map(BPF_MAP_TYPE_BLOOM_FILTER, 4, sizeof(value), 100, 0); if (!ASSERT_LT(fd, 0, "bpf_create_map bloom filter invalid key size")) close(fd); - xattr.key_size = 0; /* Invalid value size */ - xattr.value_size = 0; - fd = bpf_create_map_xattr(&xattr); + fd = bpf_create_map(BPF_MAP_TYPE_BLOOM_FILTER, 0, 0, 100, 0); if (!ASSERT_LT(fd, 0, "bpf_create_map bloom filter invalid value size 0")) close(fd); - xattr.value_size = 11; /* Invalid max entries size */ - xattr.max_entries = 0; - fd = bpf_create_map_xattr(&xattr); + fd = bpf_create_map(BPF_MAP_TYPE_BLOOM_FILTER, 0, sizeof(value), 0, 0); if (!ASSERT_LT(fd, 0, "bpf_create_map bloom filter invalid max entries size")) close(fd); - xattr.max_entries = 100; /* Bloom filter maps do not support BPF_F_NO_PREALLOC */ - xattr.map_flags = BPF_F_NO_PREALLOC; - fd = bpf_create_map_xattr(&xattr); + fd = bpf_create_map(BPF_MAP_TYPE_BLOOM_FILTER, 0, sizeof(value), 100, + BPF_F_NO_PREALLOC); if (!ASSERT_LT(fd, 0, "bpf_create_map bloom filter invalid flags")) close(fd); - xattr.map_flags = 0; - fd = bpf_create_map_xattr(&xattr); + fd = bpf_create_map(BPF_MAP_TYPE_BLOOM_FILTER, 0, sizeof(value), 100, 0); if (!ASSERT_GE(fd, 0, "bpf_create_map bloom filter")) return; @@ -67,6 +54,30 @@ static void test_fail_cases(void) close(fd); } +static void test_success_cases(void) +{ + char value[11]; + int fd, err; + + /* Create a map */ + fd = bpf_create_map(BPF_MAP_TYPE_BLOOM_FILTER, 0, sizeof(value), 100, + BPF_F_ZERO_SEED | BPF_F_NUMA_NODE); + if (!ASSERT_GE(fd, 0, "bpf_create_map bloom filter success case")) + return; + + /* Add a value to the bloom filter */ + err = bpf_map_update_elem(fd, NULL, &value, 0); + if (!ASSERT_OK(err, "bpf_map_update_elem bloom filter success case")) + goto done; + + /* Lookup a value in the bloom filter */ + err = bpf_map_lookup_elem(fd, NULL, &value); + ASSERT_OK(err, "bpf_map_update_elem bloom filter success case"); + +done: + close(fd); +} + static void check_bloom(struct bloom_filter_map *skel) { struct bpf_link *link; @@ -86,16 +97,11 @@ static void test_inner_map(struct bloom_filter_map *skel, const __u32 *rand_vals __u32 nr_rand_vals) { int outer_map_fd, inner_map_fd, err, i, key = 0; - struct bpf_create_map_attr xattr = { - .name = "bloom_filter_inner_map", - .map_type = BPF_MAP_TYPE_BLOOM_FILTER, - .value_size = sizeof(__u32), - .max_entries = nr_rand_vals, - }; struct bpf_link *link; /* Create a bloom filter map that will be used as the inner map */ - inner_map_fd = bpf_create_map_xattr(&xattr); + inner_map_fd = bpf_create_map(BPF_MAP_TYPE_BLOOM_FILTER, 0, sizeof(*rand_vals), + nr_rand_vals, 0); if (!ASSERT_GE(inner_map_fd, 0, "bpf_create_map bloom filter inner map")) return; @@ -190,6 +196,7 @@ void test_bloom_filter_map(void) int err; test_fail_cases(); + test_success_cases(); err = setup_progs(&skel, &rand_vals, &nr_rand_vals); if (err) From ad10c381d133d9f786564bff4b223be1372f6c2a Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Sun, 31 Oct 2021 10:13:53 -0700 Subject: [PATCH 175/181] bpf: Add missing map_delete_elem method to bloom filter map Without it, kernel crashes in map_delete_elem(), as reported by syzbot. BUG: kernel NULL pointer dereference, address: 0000000000000000 PGD 72c97067 P4D 72c97067 PUD 1e20c067 PMD 0 Oops: 0010 [#1] PREEMPT SMP KASAN CPU: 0 PID: 6518 Comm: syz-executor196 Not tainted 5.15.0-rc3-syzkaller #0 Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS Google 01/01/2011 RIP: 0010:0x0 Code: Unable to access opcode bytes at RIP 0xffffffffffffffd6. RSP: 0018:ffffc90002bafcb8 EFLAGS: 00010246 RAX: dffffc0000000000 RBX: 1ffff92000575f9f RCX: 0000000000000000 RDX: 1ffffffff1327aba RSI: 0000000000000000 RDI: ffff888025a30c00 RBP: ffffc90002baff08 R08: 0000000000000000 R09: 0000000000000001 R10: ffffffff818525d8 R11: 0000000000000000 R12: ffffffff8993d560 R13: ffff888025a30c00 R14: ffff888024bc0000 R15: 0000000000000000 FS: 0000555557491300(0000) GS:ffff8880b9c00000(0000) knlGS:0000000000000000 CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 CR2: ffffffffffffffd6 CR3: 0000000070189000 CR4: 00000000003506f0 DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000 DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400 Call Trace: map_delete_elem kernel/bpf/syscall.c:1220 [inline] __sys_bpf+0x34f1/0x5ee0 kernel/bpf/syscall.c:4606 __do_sys_bpf kernel/bpf/syscall.c:4719 [inline] __se_sys_bpf kernel/bpf/syscall.c:4717 [inline] __x64_sys_bpf+0x75/0xb0 kernel/bpf/syscall.c:4717 do_syscall_x64 arch/x86/entry/common.c:50 [inline] Fixes: 9330986c0300 ("bpf: Add bloom filter map implementation") Reported-by: syzbot Signed-off-by: Eric Dumazet Signed-off-by: Alexei Starovoitov Acked-by: Yonghong Song Link: https://lore.kernel.org/bpf/20211031171353.4092388-1-eric.dumazet@gmail.com --- kernel/bpf/bloom_filter.c | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/kernel/bpf/bloom_filter.c b/kernel/bpf/bloom_filter.c index 073c2f2cab8b4..277a05e9c9849 100644 --- a/kernel/bpf/bloom_filter.c +++ b/kernel/bpf/bloom_filter.c @@ -77,6 +77,11 @@ static int bloom_map_pop_elem(struct bpf_map *map, void *value) return -EOPNOTSUPP; } +static int bloom_map_delete_elem(struct bpf_map *map, void *value) +{ + return -EOPNOTSUPP; +} + static struct bpf_map *bloom_map_alloc(union bpf_attr *attr) { u32 bitset_bytes, bitset_mask, nr_hash_funcs, nr_bits; @@ -192,6 +197,7 @@ const struct bpf_map_ops bloom_filter_map_ops = { .map_pop_elem = bloom_map_pop_elem, .map_lookup_elem = bloom_map_lookup_elem, .map_update_elem = bloom_map_update_elem, + .map_delete_elem = bloom_map_delete_elem, .map_check_btf = bloom_map_check_btf, .map_btf_name = "bpf_bloom_filter", .map_btf_id = &bpf_bloom_map_btf_id, From a20eac0af02810669e187cb623bc904908c423af Mon Sep 17 00:00:00 2001 From: Andrii Nakryiko Date: Mon, 1 Nov 2021 16:01:18 -0700 Subject: [PATCH 176/181] selftests/bpf: Fix also no-alu32 strobemeta selftest Previous fix aded bpf_clamp_umax() helper use to re-validate boundaries. While that works correctly, it introduces more branches, which blows up past 1 million instructions in no-alu32 variant of strobemeta selftests. Switching len variable from u32 to u64 also fixes the issue and reduces the number of validated instructions, so use that instead. Fix this patch and bpf_clamp_umax() removed, both alu32 and no-alu32 selftests pass. Fixes: 0133c20480b1 ("selftests/bpf: Fix strobemeta selftest regression") Signed-off-by: Andrii Nakryiko Signed-off-by: Alexei Starovoitov Link: https://lore.kernel.org/bpf/20211101230118.1273019-1-andrii@kernel.org --- tools/testing/selftests/bpf/progs/strobemeta.h | 15 ++------------- 1 file changed, 2 insertions(+), 13 deletions(-) diff --git a/tools/testing/selftests/bpf/progs/strobemeta.h b/tools/testing/selftests/bpf/progs/strobemeta.h index 3687ea755ab5a..60c93aee2f4ad 100644 --- a/tools/testing/selftests/bpf/progs/strobemeta.h +++ b/tools/testing/selftests/bpf/progs/strobemeta.h @@ -10,14 +10,6 @@ #include #include -#define bpf_clamp_umax(VAR, UMAX) \ - asm volatile ( \ - "if %0 <= %[max] goto +1\n" \ - "%0 = %[max]\n" \ - : "+r"(VAR) \ - : [max]"i"(UMAX) \ - ) - typedef uint32_t pid_t; struct task_struct {}; @@ -366,7 +358,7 @@ static __always_inline uint64_t read_str_var(struct strobemeta_cfg *cfg, void *payload) { void *location; - uint32_t len; + uint64_t len; data->str_lens[idx] = 0; location = calc_location(&cfg->str_locs[idx], tls_base); @@ -398,7 +390,7 @@ static __always_inline void *read_map_var(struct strobemeta_cfg *cfg, struct strobe_map_descr* descr = &data->map_descrs[idx]; struct strobe_map_raw map; void *location; - uint32_t len; + uint64_t len; int i; descr->tag_len = 0; /* presume no tag is set */ @@ -421,7 +413,6 @@ static __always_inline void *read_map_var(struct strobemeta_cfg *cfg, len = bpf_probe_read_user_str(payload, STROBE_MAX_STR_LEN, map.tag); if (len <= STROBE_MAX_STR_LEN) { - bpf_clamp_umax(len, STROBE_MAX_STR_LEN); descr->tag_len = len; payload += len; } @@ -439,7 +430,6 @@ static __always_inline void *read_map_var(struct strobemeta_cfg *cfg, len = bpf_probe_read_user_str(payload, STROBE_MAX_STR_LEN, map.entries[i].key); if (len <= STROBE_MAX_STR_LEN) { - bpf_clamp_umax(len, STROBE_MAX_STR_LEN); descr->key_lens[i] = len; payload += len; } @@ -447,7 +437,6 @@ static __always_inline void *read_map_var(struct strobemeta_cfg *cfg, len = bpf_probe_read_user_str(payload, STROBE_MAX_STR_LEN, map.entries[i].val); if (len <= STROBE_MAX_STR_LEN) { - bpf_clamp_umax(len, STROBE_MAX_STR_LEN); descr->val_lens[i] = len; payload += len; } From b9979db8340154526d9ab38a1883d6f6ba9b6d47 Mon Sep 17 00:00:00 2001 From: Alexei Starovoitov Date: Mon, 1 Nov 2021 15:21:51 -0700 Subject: [PATCH 177/181] bpf: Fix propagation of bounds from 64-bit min/max into 32-bit and var_off. Before this fix: 166: (b5) if r2 <= 0x1 goto pc+22 from 166 to 189: R2=invP(id=1,umax_value=1,var_off=(0x0; 0xffffffff)) After this fix: 166: (b5) if r2 <= 0x1 goto pc+22 from 166 to 189: R2=invP(id=1,umax_value=1,var_off=(0x0; 0x1)) While processing BPF_JLE the reg_set_min_max() would set true_reg->umax_value = 1 and call __reg_combine_64_into_32(true_reg). Without the fix it would not pass the condition: if (__reg64_bound_u32(reg->umin_value) && __reg64_bound_u32(reg->umax_value)) since umin_value == 0 at this point. Before commit 10bf4e83167c the umin was incorrectly ingored. The commit 10bf4e83167c fixed the correctness issue, but pessimized propagation of 64-bit min max into 32-bit min max and corresponding var_off. Fixes: 10bf4e83167c ("bpf: Fix propagation of 32 bit unsigned bounds from 64 bit bounds") Signed-off-by: Alexei Starovoitov Signed-off-by: Andrii Nakryiko Acked-by: Yonghong Song Link: https://lore.kernel.org/bpf/20211101222153.78759-1-alexei.starovoitov@gmail.com --- kernel/bpf/verifier.c | 2 +- tools/testing/selftests/bpf/verifier/array_access.c | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c index 3c8aa7df17734..29671ed49ee80 100644 --- a/kernel/bpf/verifier.c +++ b/kernel/bpf/verifier.c @@ -1425,7 +1425,7 @@ static bool __reg64_bound_s32(s64 a) static bool __reg64_bound_u32(u64 a) { - return a > U32_MIN && a < U32_MAX; + return a >= U32_MIN && a <= U32_MAX; } static void __reg_combine_64_into_32(struct bpf_reg_state *reg) diff --git a/tools/testing/selftests/bpf/verifier/array_access.c b/tools/testing/selftests/bpf/verifier/array_access.c index 1b1c798e92489..1b138cd2b187d 100644 --- a/tools/testing/selftests/bpf/verifier/array_access.c +++ b/tools/testing/selftests/bpf/verifier/array_access.c @@ -186,7 +186,7 @@ }, .fixup_map_hash_48b = { 3 }, .errstr_unpriv = "R0 leaks addr", - .errstr = "R0 unbounded memory access", + .errstr = "invalid access to map value, value_size=48 off=44 size=8", .result_unpriv = REJECT, .result = REJECT, .flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS, From 388e2c0b978339dee9b0a81a2e546f8979e021e2 Mon Sep 17 00:00:00 2001 From: Alexei Starovoitov Date: Mon, 1 Nov 2021 15:21:52 -0700 Subject: [PATCH 178/181] bpf: Fix propagation of signed bounds from 64-bit min/max into 32-bit. Similar to unsigned bounds propagation fix signed bounds. The 'Fixes' tag is a hint. There is no security bug here. The verifier was too conservative. Fixes: 3f50f132d840 ("bpf: Verifier, do explicit ALU32 bounds tracking") Signed-off-by: Alexei Starovoitov Signed-off-by: Andrii Nakryiko Acked-by: Yonghong Song Link: https://lore.kernel.org/bpf/20211101222153.78759-2-alexei.starovoitov@gmail.com --- kernel/bpf/verifier.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c index 29671ed49ee80..a4b48bd4e3ca4 100644 --- a/kernel/bpf/verifier.c +++ b/kernel/bpf/verifier.c @@ -1420,7 +1420,7 @@ static void __reg_combine_32_into_64(struct bpf_reg_state *reg) static bool __reg64_bound_s32(s64 a) { - return a > S32_MIN && a < S32_MAX; + return a >= S32_MIN && a <= S32_MAX; } static bool __reg64_bound_u32(u64 a) From 0869e5078afbd1b22c20832b391b2d85291bc0a8 Mon Sep 17 00:00:00 2001 From: Alexei Starovoitov Date: Mon, 1 Nov 2021 15:21:53 -0700 Subject: [PATCH 179/181] selftests/bpf: Add a testcase for 64-bit bounds propagation issue. ./test_progs-no_alu32 -vv -t twfw Before the 64-bit_into_32-bit fix: 19: (25) if r1 > 0x3f goto pc+6 R1_w=inv(id=0,umax_value=63,var_off=(0x0; 0xff),s32_max_value=255,u32_max_value=255) and eventually: invalid access to map value, value_size=8 off=7 size=8 R6 max value is outside of the allowed memory range libbpf: failed to load object 'no_alu32/twfw.o' After the fix: 19: (25) if r1 > 0x3f goto pc+6 R1_w=inv(id=0,umax_value=63,var_off=(0x0; 0x3f)) verif_twfw:OK Signed-off-by: Alexei Starovoitov Signed-off-by: Andrii Nakryiko Acked-by: Yonghong Song Link: https://lore.kernel.org/bpf/20211101222153.78759-3-alexei.starovoitov@gmail.com --- .../bpf/prog_tests/bpf_verif_scale.c | 5 ++ tools/testing/selftests/bpf/progs/twfw.c | 58 +++++++++++++++++++ 2 files changed, 63 insertions(+) create mode 100644 tools/testing/selftests/bpf/progs/twfw.c diff --git a/tools/testing/selftests/bpf/prog_tests/bpf_verif_scale.c b/tools/testing/selftests/bpf/prog_tests/bpf_verif_scale.c index 867349e4ed9e3..27f5d8ea7964d 100644 --- a/tools/testing/selftests/bpf/prog_tests/bpf_verif_scale.c +++ b/tools/testing/selftests/bpf/prog_tests/bpf_verif_scale.c @@ -202,3 +202,8 @@ void test_verif_scale_seg6_loop() { scale_test("test_seg6_loop.o", BPF_PROG_TYPE_LWT_SEG6LOCAL, false); } + +void test_verif_twfw() +{ + scale_test("twfw.o", BPF_PROG_TYPE_CGROUP_SKB, false); +} diff --git a/tools/testing/selftests/bpf/progs/twfw.c b/tools/testing/selftests/bpf/progs/twfw.c new file mode 100644 index 0000000000000..de1b18a62b467 --- /dev/null +++ b/tools/testing/selftests/bpf/progs/twfw.c @@ -0,0 +1,58 @@ +// SPDX-License-Identifier: GPL-2.0 +/* Copyright (c) 2021 Facebook */ +#include +#include +#include +#include + +#define TWFW_MAX_TIERS (64) +/* + * load is successful + * #define TWFW_MAX_TIERS (64u)$ + */ + +struct twfw_tier_value { + unsigned long mask[1]; +}; + +struct rule { + uint8_t seqnum; +}; + +struct rules_map { + __uint(type, BPF_MAP_TYPE_ARRAY); + __type(key, __u32); + __type(value, struct rule); + __uint(max_entries, 1); +}; + +struct tiers_map { + __uint(type, BPF_MAP_TYPE_ARRAY); + __type(key, __u32); + __type(value, struct twfw_tier_value); + __uint(max_entries, 1); +}; + +struct rules_map rules SEC(".maps"); +struct tiers_map tiers SEC(".maps"); + +SEC("cgroup_skb/ingress") +int twfw_verifier(struct __sk_buff* skb) +{ + const uint32_t key = 0; + const struct twfw_tier_value* tier = bpf_map_lookup_elem(&tiers, &key); + if (!tier) + return 1; + + struct rule* rule = bpf_map_lookup_elem(&rules, &key); + if (!rule) + return 1; + + if (rule && rule->seqnum < TWFW_MAX_TIERS) { + /* rule->seqnum / 64 should always be 0 */ + unsigned long mask = tier->mask[rule->seqnum / 64]; + if (mask) + return 0; + } + return 1; +} From 9741e07ece7c247dd65e1aa01e16b683f01c05a8 Mon Sep 17 00:00:00 2001 From: Jiri Olsa Date: Fri, 29 Oct 2021 14:57:29 +0200 Subject: [PATCH 180/181] kbuild: Unify options for BTF generation for vmlinux and modules Using new PAHOLE_FLAGS variable to pass extra arguments to pahole for both vmlinux and modules BTF data generation. Adding new scripts/pahole-flags.sh script that detect and prints pahole options. [ fixed issues found by kernel test robot ] Signed-off-by: Jiri Olsa Signed-off-by: Andrii Nakryiko Acked-by: Andrii Nakryiko Link: https://lore.kernel.org/bpf/20211029125729.70002-1-jolsa@kernel.org --- Makefile | 3 +++ scripts/Makefile.modfinal | 2 +- scripts/link-vmlinux.sh | 11 +---------- scripts/pahole-flags.sh | 20 ++++++++++++++++++++ 4 files changed, 25 insertions(+), 11 deletions(-) create mode 100755 scripts/pahole-flags.sh diff --git a/Makefile b/Makefile index 437ccc66a1c28..8f24bceec62d7 100644 --- a/Makefile +++ b/Makefile @@ -480,6 +480,8 @@ LZ4 = lz4c XZ = xz ZSTD = zstd +PAHOLE_FLAGS = $(shell PAHOLE=$(PAHOLE) $(srctree)/scripts/pahole-flags.sh) + CHECKFLAGS := -D__linux__ -Dlinux -D__STDC__ -Dunix -D__unix__ \ -Wbitwise -Wno-return-void -Wno-unknown-attribute $(CF) NOSTDINC_FLAGS := @@ -534,6 +536,7 @@ export KBUILD_CFLAGS CFLAGS_KERNEL CFLAGS_MODULE export KBUILD_AFLAGS AFLAGS_KERNEL AFLAGS_MODULE export KBUILD_AFLAGS_MODULE KBUILD_CFLAGS_MODULE KBUILD_LDFLAGS_MODULE export KBUILD_AFLAGS_KERNEL KBUILD_CFLAGS_KERNEL +export PAHOLE_FLAGS # Files to ignore in find ... statements diff --git a/scripts/Makefile.modfinal b/scripts/Makefile.modfinal index 1fb45b011e4b6..7f39599e9faed 100644 --- a/scripts/Makefile.modfinal +++ b/scripts/Makefile.modfinal @@ -40,7 +40,7 @@ quiet_cmd_ld_ko_o = LD [M] $@ quiet_cmd_btf_ko = BTF [M] $@ cmd_btf_ko = \ if [ -f vmlinux ]; then \ - LLVM_OBJCOPY="$(OBJCOPY)" $(PAHOLE) -J --btf_base vmlinux $@; \ + LLVM_OBJCOPY="$(OBJCOPY)" $(PAHOLE) -J $(PAHOLE_FLAGS) --btf_base vmlinux $@; \ $(RESOLVE_BTFIDS) -b vmlinux $@; \ else \ printf "Skipping BTF generation for %s due to unavailability of vmlinux\n" $@ 1>&2; \ diff --git a/scripts/link-vmlinux.sh b/scripts/link-vmlinux.sh index d74cee5c4326a..3ea7cece7c97d 100755 --- a/scripts/link-vmlinux.sh +++ b/scripts/link-vmlinux.sh @@ -205,7 +205,6 @@ vmlinux_link() gen_btf() { local pahole_ver - local extra_paholeopt= if ! [ -x "$(command -v ${PAHOLE})" ]; then echo >&2 "BTF: ${1}: pahole (${PAHOLE}) is not available" @@ -220,16 +219,8 @@ gen_btf() vmlinux_link ${1} - if [ "${pahole_ver}" -ge "118" ] && [ "${pahole_ver}" -le "121" ]; then - # pahole 1.18 through 1.21 can't handle zero-sized per-CPU vars - extra_paholeopt="${extra_paholeopt} --skip_encoding_btf_vars" - fi - if [ "${pahole_ver}" -ge "121" ]; then - extra_paholeopt="${extra_paholeopt} --btf_gen_floats" - fi - info "BTF" ${2} - LLVM_OBJCOPY="${OBJCOPY}" ${PAHOLE} -J ${extra_paholeopt} ${1} + LLVM_OBJCOPY="${OBJCOPY}" ${PAHOLE} -J ${PAHOLE_FLAGS} ${1} # Create ${2} which contains just .BTF section but no symbols. Add # SHF_ALLOC because .BTF will be part of the vmlinux image. --strip-all diff --git a/scripts/pahole-flags.sh b/scripts/pahole-flags.sh new file mode 100755 index 0000000000000..2b99fc77019ce --- /dev/null +++ b/scripts/pahole-flags.sh @@ -0,0 +1,20 @@ +#!/bin/sh +# SPDX-License-Identifier: GPL-2.0 + +extra_paholeopt= + +if ! [ -x "$(command -v ${PAHOLE})" ]; then + return +fi + +pahole_ver=$(${PAHOLE} --version | sed -E 's/v([0-9]+)\.([0-9]+)/\1\2/') + +if [ "${pahole_ver}" -ge "118" ] && [ "${pahole_ver}" -le "121" ]; then + # pahole 1.18 through 1.21 can't handle zero-sized per-CPU vars + extra_paholeopt="${extra_paholeopt} --skip_encoding_btf_vars" +fi +if [ "${pahole_ver}" -ge "121" ]; then + extra_paholeopt="${extra_paholeopt} --btf_gen_floats" +fi + +echo ${extra_paholeopt} From 0b170456e0dda92b8925d40e217461fcc4e1efc9 Mon Sep 17 00:00:00 2001 From: Magnus Karlsson Date: Fri, 29 Oct 2021 11:01:11 +0200 Subject: [PATCH 181/181] libbpf: Deprecate AF_XDP support MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Deprecate AF_XDP support in libbpf ([0]). This has been moved to libxdp as it is a better fit for that library. The AF_XDP support only uses the public libbpf functions and can therefore just use libbpf as a library from libxdp. The libxdp APIs are exactly the same so it should just be linking with libxdp instead of libbpf for the AF_XDP functionality. If not, please submit a bug report. Linking with both libraries is supported but make sure you link in the correct order so that the new functions in libxdp are used instead of the deprecated ones in libbpf. Libxdp can be found at https://github.com/xdp-project/xdp-tools. [0] Closes: https://github.com/libbpf/libbpf/issues/270 Signed-off-by: Magnus Karlsson Signed-off-by: Andrii Nakryiko Acked-by: Toke Høiland-Jørgensen Link: https://lore.kernel.org/bpf/20211029090111.4733-1-magnus.karlsson@gmail.com --- tools/lib/bpf/xsk.h | 90 ++++++++++++++++++++++++++------------------- 1 file changed, 52 insertions(+), 38 deletions(-) diff --git a/tools/lib/bpf/xsk.h b/tools/lib/bpf/xsk.h index 01c12dca9c100..64e9c57fd792d 100644 --- a/tools/lib/bpf/xsk.h +++ b/tools/lib/bpf/xsk.h @@ -23,6 +23,12 @@ extern "C" { #endif +/* This whole API has been deprecated and moved to libxdp that can be found at + * https://github.com/xdp-project/xdp-tools. The APIs are exactly the same so + * it should just be linking with libxdp instead of libbpf for this set of + * functionality. If not, please submit a bug report on the aforementioned page. + */ + /* Load-Acquire Store-Release barriers used by the XDP socket * library. The following macros should *NOT* be considered part of * the xsk.h API, and is subject to change anytime. @@ -245,8 +251,10 @@ static inline __u64 xsk_umem__add_offset_to_addr(__u64 addr) return xsk_umem__extract_addr(addr) + xsk_umem__extract_offset(addr); } -LIBBPF_API int xsk_umem__fd(const struct xsk_umem *umem); -LIBBPF_API int xsk_socket__fd(const struct xsk_socket *xsk); +LIBBPF_API LIBBPF_DEPRECATED_SINCE(0, 7, "AF_XDP support deprecated and moved to libxdp") +int xsk_umem__fd(const struct xsk_umem *umem); +LIBBPF_API LIBBPF_DEPRECATED_SINCE(0, 7, "AF_XDP support deprecated and moved to libxdp") +int xsk_socket__fd(const struct xsk_socket *xsk); #define XSK_RING_CONS__DEFAULT_NUM_DESCS 2048 #define XSK_RING_PROD__DEFAULT_NUM_DESCS 2048 @@ -263,10 +271,10 @@ struct xsk_umem_config { __u32 flags; }; -LIBBPF_API int xsk_setup_xdp_prog(int ifindex, - int *xsks_map_fd); -LIBBPF_API int xsk_socket__update_xskmap(struct xsk_socket *xsk, - int xsks_map_fd); +LIBBPF_API LIBBPF_DEPRECATED_SINCE(0, 7, "AF_XDP support deprecated and moved to libxdp") +int xsk_setup_xdp_prog(int ifindex, int *xsks_map_fd); +LIBBPF_API LIBBPF_DEPRECATED_SINCE(0, 7, "AF_XDP support deprecated and moved to libxdp") +int xsk_socket__update_xskmap(struct xsk_socket *xsk, int xsks_map_fd); /* Flags for the libbpf_flags field. */ #define XSK_LIBBPF_FLAGS__INHIBIT_PROG_LOAD (1 << 0) @@ -280,40 +288,46 @@ struct xsk_socket_config { }; /* Set config to NULL to get the default configuration. */ -LIBBPF_API int xsk_umem__create(struct xsk_umem **umem, - void *umem_area, __u64 size, - struct xsk_ring_prod *fill, - struct xsk_ring_cons *comp, - const struct xsk_umem_config *config); -LIBBPF_API int xsk_umem__create_v0_0_2(struct xsk_umem **umem, - void *umem_area, __u64 size, - struct xsk_ring_prod *fill, - struct xsk_ring_cons *comp, - const struct xsk_umem_config *config); -LIBBPF_API int xsk_umem__create_v0_0_4(struct xsk_umem **umem, - void *umem_area, __u64 size, - struct xsk_ring_prod *fill, - struct xsk_ring_cons *comp, - const struct xsk_umem_config *config); -LIBBPF_API int xsk_socket__create(struct xsk_socket **xsk, - const char *ifname, __u32 queue_id, - struct xsk_umem *umem, - struct xsk_ring_cons *rx, - struct xsk_ring_prod *tx, - const struct xsk_socket_config *config); -LIBBPF_API int -xsk_socket__create_shared(struct xsk_socket **xsk_ptr, - const char *ifname, - __u32 queue_id, struct xsk_umem *umem, - struct xsk_ring_cons *rx, - struct xsk_ring_prod *tx, - struct xsk_ring_prod *fill, - struct xsk_ring_cons *comp, - const struct xsk_socket_config *config); +LIBBPF_API LIBBPF_DEPRECATED_SINCE(0, 7, "AF_XDP support deprecated and moved to libxdp") +int xsk_umem__create(struct xsk_umem **umem, + void *umem_area, __u64 size, + struct xsk_ring_prod *fill, + struct xsk_ring_cons *comp, + const struct xsk_umem_config *config); +LIBBPF_API LIBBPF_DEPRECATED_SINCE(0, 7, "AF_XDP support deprecated and moved to libxdp") +int xsk_umem__create_v0_0_2(struct xsk_umem **umem, + void *umem_area, __u64 size, + struct xsk_ring_prod *fill, + struct xsk_ring_cons *comp, + const struct xsk_umem_config *config); +LIBBPF_API LIBBPF_DEPRECATED_SINCE(0, 7, "AF_XDP support deprecated and moved to libxdp") +int xsk_umem__create_v0_0_4(struct xsk_umem **umem, + void *umem_area, __u64 size, + struct xsk_ring_prod *fill, + struct xsk_ring_cons *comp, + const struct xsk_umem_config *config); +LIBBPF_API LIBBPF_DEPRECATED_SINCE(0, 7, "AF_XDP support deprecated and moved to libxdp") +int xsk_socket__create(struct xsk_socket **xsk, + const char *ifname, __u32 queue_id, + struct xsk_umem *umem, + struct xsk_ring_cons *rx, + struct xsk_ring_prod *tx, + const struct xsk_socket_config *config); +LIBBPF_API LIBBPF_DEPRECATED_SINCE(0, 7, "AF_XDP support deprecated and moved to libxdp") +int xsk_socket__create_shared(struct xsk_socket **xsk_ptr, + const char *ifname, + __u32 queue_id, struct xsk_umem *umem, + struct xsk_ring_cons *rx, + struct xsk_ring_prod *tx, + struct xsk_ring_prod *fill, + struct xsk_ring_cons *comp, + const struct xsk_socket_config *config); /* Returns 0 for success and -EBUSY if the umem is still in use. */ -LIBBPF_API int xsk_umem__delete(struct xsk_umem *umem); -LIBBPF_API void xsk_socket__delete(struct xsk_socket *xsk); +LIBBPF_API LIBBPF_DEPRECATED_SINCE(0, 7, "AF_XDP support deprecated and moved to libxdp") +int xsk_umem__delete(struct xsk_umem *umem); +LIBBPF_API LIBBPF_DEPRECATED_SINCE(0, 7, "AF_XDP support deprecated and moved to libxdp") +void xsk_socket__delete(struct xsk_socket *xsk); #ifdef __cplusplus } /* extern "C" */