From 68493d88cfb32811feac0b2be942fb5304571d45 Mon Sep 17 00:00:00 2001 From: Greg Rose Date: Mon, 28 Oct 2024 08:56:30 -0700 Subject: [PATCH 01/49] ionic: clean interrupt before enabling queue to avoid credit race jira VULN-5563 pre cve CVE-2024-39502 commit-author Neel Patel commit e8797a058466b60fc5a3291b92430c93ba90eaff upstream-diff some fuzz around placement of the napi_enable() call - the fix for CVE-2024-39502 will place it correctly. Clear the interrupt credits before enabling the queue rather than after to be sure that the enabled queue starts at 0 and that we don't wipe away possible credits after enabling the queue. Fixes: 0f3154e6bcb3 ("ionic: Add Tx and Rx handling") Signed-off-by: Neel Patel Signed-off-by: Shannon Nelson Reviewed-by: Leon Romanovsky Signed-off-by: Jakub Kicinski (cherry picked from commit e8797a058466b60fc5a3291b92430c93ba90eaff) Signed-off-by: Greg Rose --- drivers/net/ethernet/pensando/ionic/ionic_lif.c | 13 ++++++++++++- 1 file changed, 12 insertions(+), 1 deletion(-) diff --git a/drivers/net/ethernet/pensando/ionic/ionic_lif.c b/drivers/net/ethernet/pensando/ionic/ionic_lif.c index ab27ddf10e..49ceb07938 100644 --- a/drivers/net/ethernet/pensando/ionic/ionic_lif.c +++ b/drivers/net/ethernet/pensando/ionic/ionic_lif.c @@ -267,6 +267,7 @@ static int ionic_qcq_enable(struct ionic_qcq *qcq) .oper = IONIC_Q_ENABLE, }, }; + int ret; idev = &lif->ionic->idev; dev = lif->ionic->dev; @@ -274,6 +275,16 @@ static int ionic_qcq_enable(struct ionic_qcq *qcq) dev_dbg(dev, "q_enable.index %d q_enable.qtype %d\n", ctx.cmd.q_control.index, ctx.cmd.q_control.type); + if (qcq->flags & IONIC_QCQ_F_INTR) + ionic_intr_clean(idev->intr_ctrl, qcq->intr.index); + + ret = ionic_adminq_post_wait(lif, &ctx); + if (ret) + return ret; + + if (qcq->napi.poll) + napi_enable(&qcq->napi); + if (qcq->flags & IONIC_QCQ_F_INTR) { irq_set_affinity_hint(qcq->intr.vector, &qcq->intr.affinity_mask); @@ -283,7 +294,7 @@ static int ionic_qcq_enable(struct ionic_qcq *qcq) IONIC_INTR_MASK_CLEAR); } - return ionic_adminq_post_wait(lif, &ctx); + return 0; } static int ionic_qcq_disable(struct ionic_lif *lif, struct ionic_qcq *qcq, int fw_err) From 70b1b0a3101e6d0c93b8bc750f2a773d05741bf6 Mon Sep 17 00:00:00 2001 From: Greg Rose Date: Mon, 7 Oct 2024 12:07:28 -0700 Subject: [PATCH 02/49] ionic: fix use after netif_napi_del() jira VULN-5563 cve CVE-2024-39502 commit-author Taehee Yoo commit 79f18a41dd056115d685f3b0a419c7cd40055e13 upstream-diff There's other modifications in and around this code in the upstream patch. Also, the napi_enable from the patch gets placed on the wrong line. Remove the extra cruft and move the napi_enable() to where it should be according to the commit. When queues are started, netif_napi_add() and napi_enable() are called. If there are 4 queues and only 3 queues are used for the current configuration, only 3 queues' napi should be registered and enabled. The ionic_qcq_enable() checks whether the .poll pointer is not NULL for enabling only the using queue' napi. Unused queues' napi will not be registered by netif_napi_add(), so the .poll pointer indicates NULL. But it couldn't distinguish whether the napi was unregistered or not because netif_napi_del() doesn't reset the .poll pointer to NULL. So, ionic_qcq_enable() calls napi_enable() for the queue, which was unregistered by netif_napi_del(). Reproducer: ethtool -L rx 1 tx 1 combined 0 ethtool -L rx 0 tx 0 combined 1 ethtool -L rx 0 tx 0 combined 4 Splat looks like: kernel BUG at net/core/dev.c:6666! Oops: invalid opcode: 0000 [#1] PREEMPT SMP NOPTI CPU: 3 PID: 1057 Comm: kworker/3:3 Not tainted 6.10.0-rc2+ #16 Workqueue: events ionic_lif_deferred_work [ionic] RIP: 0010:napi_enable+0x3b/0x40 Code: 48 89 c2 48 83 e2 f6 80 b9 61 09 00 00 00 74 0d 48 83 bf 60 01 00 00 00 74 03 80 ce 01 f0 4f RSP: 0018:ffffb6ed83227d48 EFLAGS: 00010246 RAX: 0000000000000000 RBX: ffff97560cda0828 RCX: 0000000000000029 RDX: 0000000000000001 RSI: 0000000000000000 RDI: ffff97560cda0a28 RBP: ffffb6ed83227d50 R08: 0000000000000400 R09: 0000000000000001 R10: 0000000000000001 R11: 0000000000000001 R12: 0000000000000000 R13: ffff97560ce3c1a0 R14: 0000000000000000 R15: ffff975613ba0a20 FS: 0000000000000000(0000) GS:ffff975d5f780000(0000) knlGS:0000000000000000 CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 CR2: 00007f8f734ee200 CR3: 0000000103e50000 CR4: 00000000007506f0 PKRU: 55555554 Call Trace: ? die+0x33/0x90 ? do_trap+0xd9/0x100 ? napi_enable+0x3b/0x40 ? do_error_trap+0x83/0xb0 ? napi_enable+0x3b/0x40 ? napi_enable+0x3b/0x40 ? exc_invalid_op+0x4e/0x70 ? napi_enable+0x3b/0x40 ? asm_exc_invalid_op+0x16/0x20 ? napi_enable+0x3b/0x40 ionic_qcq_enable+0xb7/0x180 [ionic 59bdfc8a035436e1c4224ff7d10789e3f14643f8] ionic_start_queues+0xc4/0x290 [ionic 59bdfc8a035436e1c4224ff7d10789e3f14643f8] ionic_link_status_check+0x11c/0x170 [ionic 59bdfc8a035436e1c4224ff7d10789e3f14643f8] ionic_lif_deferred_work+0x129/0x280 [ionic 59bdfc8a035436e1c4224ff7d10789e3f14643f8] process_one_work+0x145/0x360 worker_thread+0x2bb/0x3d0 ? __pfx_worker_thread+0x10/0x10 kthread+0xcc/0x100 ? __pfx_kthread+0x10/0x10 ret_from_fork+0x2d/0x50 ? __pfx_kthread+0x10/0x10 ret_from_fork_asm+0x1a/0x30 Fixes: 0f3154e6bcb3 ("ionic: Add Tx and Rx handling") Signed-off-by: Taehee Yoo Reviewed-by: Brett Creeley Reviewed-by: Shannon Nelson Link: https://lore.kernel.org/r/20240612060446.1754392-1-ap420073@gmail.com Signed-off-by: Jakub Kicinski (cherry picked from commit 79f18a41dd056115d685f3b0a419c7cd40055e13) Signed-off-by: Greg Rose Conflicts: drivers/net/ethernet/pensando/ionic/ionic_lif.c --- drivers/net/ethernet/pensando/ionic/ionic_lif.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/ethernet/pensando/ionic/ionic_lif.c b/drivers/net/ethernet/pensando/ionic/ionic_lif.c index 49ceb07938..0899de5dce 100644 --- a/drivers/net/ethernet/pensando/ionic/ionic_lif.c +++ b/drivers/net/ethernet/pensando/ionic/ionic_lif.c @@ -286,9 +286,9 @@ static int ionic_qcq_enable(struct ionic_qcq *qcq) napi_enable(&qcq->napi); if (qcq->flags & IONIC_QCQ_F_INTR) { + napi_enable(&qcq->napi); irq_set_affinity_hint(qcq->intr.vector, &qcq->intr.affinity_mask); - napi_enable(&qcq->napi); ionic_intr_clean(idev->intr_ctrl, qcq->intr.index); ionic_intr_mask(idev->intr_ctrl, qcq->intr.index, IONIC_INTR_MASK_CLEAR); From 8a1d1bd65df4b5201caf317c488b0fc2cbd58463 Mon Sep 17 00:00:00 2001 From: Greg Rose Date: Thu, 17 Oct 2024 16:19:33 -0700 Subject: [PATCH 03/49] netfilter: nftables: add catch-all set element support jira VULN-429 subsystem-sync netfilter:nf_tables 4.18.0-534 commit-author Pablo Neira Ayuso commit aaa31047a6d25da0fa101da1ed544e1247949b40 upstream-diff We only take one small piece of code from this patch. The netfilters folks made a big NO NO by making a commit with a new feature as well as adding some additional safety checks. Red Hat took the additional safety checks but without any of the rest of this rather large upstream patch but this commit has the necessary bits for backporting the remaining netfilter bits for this VULN ticket. This patch extends the set infrastructure to add a special catch-all set element. If the lookup fails to find an element (or range) in the set, then the catch-all element is selected. Users can specify a mapping, expression(s) and timeout to be attached to the catch-all element. This patch adds a catchall list to the set, this list might contain more than one single catch-all element (e.g. in case that the catch-all element is removed and a new one is added in the same transaction). However, most of the time, there will be either one element or no elements at all in this list. The catch-all element is identified via NFT_SET_ELEM_CATCHALL flag and such special element has no NFTA_SET_ELEM_KEY attribute. There is a new nft_set_elem_catchall object that stores a reference to the dummy catch-all element (catchall->elem) whose layout is the same of the set element type to reuse the existing set element codebase. The set size does not apply to the catch-all element, users can define a catch-all element even if the set is full. The check for valid set element flags hava been updates to report EOPNOTSUPP in case userspace requests flags that are not supported when using new userspace nftables and old kernel. Signed-off-by: Pablo Neira Ayuso Signed-off-by: Greg Rose --- net/netfilter/nf_tables_api.c | 12 ++++++++++-- 1 file changed, 10 insertions(+), 2 deletions(-) diff --git a/net/netfilter/nf_tables_api.c b/net/netfilter/nf_tables_api.c index fea6a2d167..3dd7ae1612 100644 --- a/net/netfilter/nf_tables_api.c +++ b/net/netfilter/nf_tables_api.c @@ -4630,7 +4630,8 @@ static int nf_tables_fill_setelem(struct sk_buff *skb, if (nest == NULL) goto nla_put_failure; - if (nft_data_dump(skb, NFTA_SET_ELEM_KEY, nft_set_ext_key(ext), + if (nft_set_ext_exists(ext, NFT_SET_EXT_KEY) && + nft_data_dump(skb, NFTA_SET_ELEM_KEY, nft_set_ext_key(ext), NFT_DATA_VALUE, set->klen) < 0) goto nla_put_failure; @@ -5224,6 +5225,13 @@ static int nft_set_elem_expr_setup(struct nft_ctx *ctx, return -ENOMEM; } +static void nft_setelem_remove(const struct net *net, + const struct nft_set *set, + const struct nft_set_elem *elem) +{ + set->ops->remove(net, set, elem); +} + static bool nft_setelem_valid_key_end(const struct nft_set *set, struct nlattr **nla, u32 flags) { @@ -5577,7 +5585,7 @@ static int nft_add_set_elem(struct nft_ctx *ctx, struct nft_set *set, return 0; err_set_full: - set->ops->remove(ctx->net, set, &elem); + nft_setelem_remove(ctx->net, set, &elem); err_element_clash: kfree(trans); err_elem_expr: From 1f7885fffb1c88a83b61c940b4085b2d30f95edb Mon Sep 17 00:00:00 2001 From: Greg Rose Date: Sun, 27 Oct 2024 10:23:51 -0700 Subject: [PATCH 04/49] netfilter: nf_tables: Remove spurious code jira VULN-429 subsystem-sync netfilter:nf_tables 4.18.0-534 Remove spurious code that does not exist in 4.18.0-534 but is still hanging around. Signed-off-by: Greg Rose --- net/netfilter/nf_tables_api.c | 4 ---- 1 file changed, 4 deletions(-) diff --git a/net/netfilter/nf_tables_api.c b/net/netfilter/nf_tables_api.c index 3dd7ae1612..82de162db7 100644 --- a/net/netfilter/nf_tables_api.c +++ b/net/netfilter/nf_tables_api.c @@ -5535,10 +5535,6 @@ static int nft_add_set_elem(struct nft_ctx *ctx, struct nft_set *set, udata->len = ulen - 1; nla_memcpy(&udata->data, nla[NFTA_SET_ELEM_USERDATA], ulen); } - if (obj) { - *nft_set_ext_obj(ext) = obj; - obj->use++; - } err = nft_set_elem_expr_setup(ctx, ext, expr_array, num_exprs); if (err < 0) goto err_elem_expr; From fab4dfd73ecb756b11d85c206b6b94a527e3907f Mon Sep 17 00:00:00 2001 From: Greg Rose Date: Fri, 18 Oct 2024 14:38:08 -0700 Subject: [PATCH 05/49] netfilter: nf_tables: honor NLM_F_CREATE and NLM_F_EXCL in event notification jira VULN-429 subsystem-sync netfilter:nf_tables 4.18.0-534 commit-author Pablo Neira Ayuso commit 6fb721cf781808ee2ca5e737fb0592cc68de3381 upstream-diff The catchall features from previous commits were not actually backported by Red Hat, so this patch has to work around that. Include the NLM_F_CREATE and NLM_F_EXCL flags in netlink event notifications, otherwise userspace cannot distiguish between create and add commands. Fixes: 96518518cc41 ("netfilter: add nftables") Signed-off-by: Pablo Neira Ayuso (cherry picked from commit 6fb721cf781808ee2ca5e737fb0592cc68de3381) Signed-off-by: Greg Rose --- include/net/netfilter/nf_tables.h | 4 +-- net/netfilter/nf_tables_api.c | 49 +++++++++++++++++++++++-------- net/netfilter/nft_quota.c | 2 +- 3 files changed, 40 insertions(+), 15 deletions(-) diff --git a/include/net/netfilter/nf_tables.h b/include/net/netfilter/nf_tables.h index afd104d4fe..3d462d2872 100644 --- a/include/net/netfilter/nf_tables.h +++ b/include/net/netfilter/nf_tables.h @@ -1167,8 +1167,8 @@ struct nft_object *nft_obj_lookup(const struct net *net, u8 genmask); void nft_obj_notify(struct net *net, const struct nft_table *table, - struct nft_object *obj, u32 portid, u32 seq, - int event, int family, int report, gfp_t gfp); + struct nft_object *obj, u32 portid, u32 seq, int event, + u16 flags, int family, int report, gfp_t gfp); /** * struct nft_object_type - stateful object type diff --git a/net/netfilter/nf_tables_api.c b/net/netfilter/nf_tables_api.c index 82de162db7..cb271d49c9 100644 --- a/net/netfilter/nf_tables_api.c +++ b/net/netfilter/nf_tables_api.c @@ -760,6 +760,7 @@ static void nft_notify_enqueue(struct sk_buff *skb, bool report, static void nf_tables_table_notify(const struct nft_ctx *ctx, int event) { struct sk_buff *skb; + u16 flags = 0; int err; if (!ctx->report && @@ -770,8 +771,11 @@ static void nf_tables_table_notify(const struct nft_ctx *ctx, int event) if (skb == NULL) goto err; + if (ctx->flags & (NLM_F_CREATE | NLM_F_EXCL)) + flags |= ctx->flags & (NLM_F_CREATE | NLM_F_EXCL); + err = nf_tables_fill_table_info(skb, ctx->net, ctx->portid, ctx->seq, - event, 0, ctx->family, ctx->table); + event, flags, ctx->family, ctx->table); if (err < 0) { kfree_skb(skb); goto err; @@ -1493,6 +1497,7 @@ static int nf_tables_fill_chain_info(struct sk_buff *skb, struct net *net, static void nf_tables_chain_notify(const struct nft_ctx *ctx, int event) { struct sk_buff *skb; + u16 flags = 0; int err; if (!ctx->report && @@ -1503,8 +1508,11 @@ static void nf_tables_chain_notify(const struct nft_ctx *ctx, int event) if (skb == NULL) goto err; + if (ctx->flags & (NLM_F_CREATE | NLM_F_EXCL)) + flags |= ctx->flags & (NLM_F_CREATE | NLM_F_EXCL); + err = nf_tables_fill_chain_info(skb, ctx->net, ctx->portid, ctx->seq, - event, 0, ctx->family, ctx->table, + event, flags, ctx->family, ctx->table, ctx->chain); if (err < 0) { kfree_skb(skb); @@ -2767,6 +2775,7 @@ static void nf_tables_rule_notify(const struct nft_ctx *ctx, const struct nft_rule *rule, int event) { struct sk_buff *skb; + u16 flags = 0; int err; if (!ctx->report && @@ -2777,6 +2786,9 @@ static void nf_tables_rule_notify(const struct nft_ctx *ctx, if (skb == NULL) goto err; + if (ctx->flags & (NLM_F_CREATE | NLM_F_EXCL)) + flags |= ctx->flags & (NLM_F_CREATE | NLM_F_EXCL); + err = nf_tables_fill_rule_info(skb, ctx->net, ctx->portid, ctx->seq, event, 0, ctx->family, ctx->table, ctx->chain, rule, NULL); @@ -3810,8 +3822,9 @@ static void nf_tables_set_notify(const struct nft_ctx *ctx, const struct nft_set *set, int event, gfp_t gfp_flags) { - struct sk_buff *skb; u32 portid = ctx->portid; + struct sk_buff *skb; + u16 flags = 0; int err; if (!ctx->report && @@ -3822,7 +3835,10 @@ static void nf_tables_set_notify(const struct nft_ctx *ctx, if (skb == NULL) goto err; - err = nf_tables_fill_set(skb, ctx, set, event, 0); + if (ctx->flags & (NLM_F_CREATE | NLM_F_EXCL)) + flags |= ctx->flags & (NLM_F_CREATE | NLM_F_EXCL); + + err = nf_tables_fill_set(skb, ctx, set, event, flags); if (err < 0) { kfree_skb(skb); goto err; @@ -5011,11 +5027,12 @@ static int nf_tables_getsetelem(struct net *net, struct sock *nlsk, static void nf_tables_setelem_notify(const struct nft_ctx *ctx, const struct nft_set *set, const struct nft_set_elem *elem, - int event, u16 flags) + int event) { struct net *net = ctx->net; u32 portid = ctx->portid; struct sk_buff *skb; + u16 flags = 0; int err; if (!ctx->report && !nfnetlink_has_listeners(net, NFNLGRP_NFTABLES)) @@ -5025,6 +5042,9 @@ static void nf_tables_setelem_notify(const struct nft_ctx *ctx, if (skb == NULL) goto err; + if (ctx->flags & (NLM_F_CREATE | NLM_F_EXCL)) + flags |= ctx->flags & (NLM_F_CREATE | NLM_F_EXCL); + err = nf_tables_fill_setelem_info(skb, ctx, 0, portid, event, flags, set, elem); if (err < 0) { @@ -6492,7 +6512,7 @@ static int nf_tables_delobj(struct net *net, struct sock *nlsk, void nft_obj_notify(struct net *net, const struct nft_table *table, struct nft_object *obj, u32 portid, u32 seq, int event, - int family, int report, gfp_t gfp) + u16 flags, int family, int report, gfp_t gfp) { struct sk_buff *skb; int err; @@ -6516,8 +6536,9 @@ void nft_obj_notify(struct net *net, const struct nft_table *table, if (skb == NULL) goto err; - err = nf_tables_fill_obj_info(skb, net, portid, seq, event, 0, family, - table, obj, false); + err = nf_tables_fill_obj_info(skb, net, portid, seq, event, + flags & (NLM_F_CREATE | NLM_F_EXCL), + family, table, obj, false); if (err < 0) { kfree_skb(skb); goto err; @@ -6534,7 +6555,7 @@ static void nf_tables_obj_notify(const struct nft_ctx *ctx, struct nft_object *obj, int event) { nft_obj_notify(ctx->net, ctx->table, obj, ctx->portid, ctx->seq, event, - ctx->family, ctx->report, GFP_KERNEL); + ctx->flags, ctx->family, ctx->report, GFP_KERNEL); } /* @@ -7148,6 +7169,7 @@ static void nf_tables_flowtable_notify(struct nft_ctx *ctx, int event) { struct sk_buff *skb; + u16 flags = 0; int err; if (!ctx->report && @@ -7158,8 +7180,11 @@ static void nf_tables_flowtable_notify(struct nft_ctx *ctx, if (skb == NULL) goto err; + if (ctx->flags & (NLM_F_CREATE | NLM_F_EXCL)) + flags |= ctx->flags & (NLM_F_CREATE | NLM_F_EXCL); + err = nf_tables_fill_flowtable_info(skb, ctx->net, ctx->portid, - ctx->seq, event, 0, + ctx->seq, event, flags, ctx->family, flowtable); if (err < 0) { kfree_skb(skb); @@ -7999,7 +8024,7 @@ static int nf_tables_commit(struct net *net, struct sk_buff *skb) te->set->ops->activate(net, te->set, &te->elem); nf_tables_setelem_notify(&trans->ctx, te->set, &te->elem, - NFT_MSG_NEWSETELEM, 0); + NFT_MSG_NEWSETELEM); nft_trans_destroy(trans); break; case NFT_MSG_DELSETELEM: @@ -8007,7 +8032,7 @@ static int nf_tables_commit(struct net *net, struct sk_buff *skb) nf_tables_setelem_notify(&trans->ctx, te->set, &te->elem, - NFT_MSG_DELSETELEM, 0); + NFT_MSG_DELSETELEM); te->set->ops->remove(net, te->set, &te->elem); atomic_dec(&te->set->nelems); te->set->ndeact--; diff --git a/net/netfilter/nft_quota.c b/net/netfilter/nft_quota.c index e0c6e843cb..cf5cf49681 100644 --- a/net/netfilter/nft_quota.c +++ b/net/netfilter/nft_quota.c @@ -63,7 +63,7 @@ static void nft_quota_obj_eval(struct nft_object *obj, if (overquota && !test_and_set_bit(NFT_QUOTA_DEPLETED_BIT, &priv->flags)) nft_obj_notify(nft_net(pkt), obj->key.table, obj, 0, 0, - NFT_MSG_NEWOBJ, nft_pf(pkt), 0, GFP_ATOMIC); + NFT_MSG_NEWOBJ, 0, nft_pf(pkt), 0, GFP_ATOMIC); } static int nft_quota_do_init(const struct nlattr * const tb[], From 8ec9ab0e52bf3cb8bc5f0a90cc18c1fb8f7a62cd Mon Sep 17 00:00:00 2001 From: Greg Rose Date: Mon, 21 Oct 2024 09:40:55 -0700 Subject: [PATCH 06/49] netfilter: nf_tables: integrate pipapo into commit protocol jira VULN-429 subsystem-sync netfilter:nf_tables 4.18.0-534 commit-author Pablo Neira Ayuso commit 212ed75dc5fb9d1423b3942c8f872a868cda3466 upstream-diff The offsets for the code are too divergent for the upstream patch to apply cleanly, but except for that fuzz I believe it is correct. The pipapo set backend follows copy-on-update approach, maintaining one clone of the existing datastructure that is being updated. The clone and current datastructures are swapped via rcu from the commit step. The existing integration with the commit protocol is flawed because there is no operation to clean up the clone if the transaction is aborted. Moreover, the datastructure swap happens on set element activation. This patch adds two new operations for sets: commit and abort, these new operations are invoked from the commit and abort steps, after the transactions have been digested, and it updates the pipapo set backend to use it. This patch adds a new ->pending_update field to sets to maintain a list of sets that require this new commit and abort operations. Fixes: 3c4287f62044 ("nf_tables: Add set type for arbitrary concatenation of ranges") Signed-off-by: Pablo Neira Ayuso (cherry picked from commit 212ed75dc5fb9d1423b3942c8f872a868cda3466) Signed-off-by: Greg Rose --- include/net/netfilter/nf_tables.h | 3 ++ net/netfilter/nf_tables_api.c | 56 +++++++++++++++++++++++++++++++ net/netfilter/nft_set_pipapo.c | 53 ++++++++++++++++++++--------- 3 files changed, 97 insertions(+), 15 deletions(-) diff --git a/include/net/netfilter/nf_tables.h b/include/net/netfilter/nf_tables.h index 3d462d2872..a34c506de0 100644 --- a/include/net/netfilter/nf_tables.h +++ b/include/net/netfilter/nf_tables.h @@ -388,6 +388,8 @@ struct nft_set_ops { const struct nft_set_elem *elem, unsigned int flags); + void (*commit)(const struct nft_set *set); + void (*abort)(const struct nft_set *set); u64 (*privsize)(const struct nlattr * const nla[], const struct nft_set_desc *desc); bool (*estimate)(const struct nft_set_desc *desc, @@ -489,6 +491,7 @@ struct nft_set { u16 policy; u16 udlen; unsigned char *udata; + struct list_head pending_update; /* runtime data below here */ const struct nft_set_ops *ops ____cacheline_aligned; u16 flags:14, diff --git a/net/netfilter/nf_tables_api.c b/net/netfilter/nf_tables_api.c index cb271d49c9..17fb43e246 100644 --- a/net/netfilter/nf_tables_api.c +++ b/net/netfilter/nf_tables_api.c @@ -4311,6 +4311,7 @@ static int nf_tables_newset(struct net *net, struct sock *nlsk, } set->handle = nf_tables_alloc_handle(table); + INIT_LIST_HEAD(&set->pending_update); err = nft_trans_set_add(&ctx, NFT_MSG_NEWSET, set); if (err < 0) @@ -7878,9 +7879,24 @@ static void nf_tables_commit_audit_log(struct list_head *adl, u32 generation) } } +static void nft_set_commit_update(struct list_head *set_update_list) +{ + struct nft_set *set, *next; + + list_for_each_entry_safe(set, next, set_update_list, pending_update) { + list_del_init(&set->pending_update); + + if (!set->ops->commit) + continue; + + set->ops->commit(set); + } +} + static int nf_tables_commit(struct net *net, struct sk_buff *skb) { struct nft_trans *trans, *next; + LIST_HEAD(set_update_list); struct nft_trans_elem *te; struct nft_chain *chain; struct nft_table *table; @@ -8025,6 +8041,11 @@ static int nf_tables_commit(struct net *net, struct sk_buff *skb) nf_tables_setelem_notify(&trans->ctx, te->set, &te->elem, NFT_MSG_NEWSETELEM); + if (te->set->ops->commit && + list_empty(&te->set->pending_update)) { + list_add_tail(&te->set->pending_update, + &set_update_list); + } nft_trans_destroy(trans); break; case NFT_MSG_DELSETELEM: @@ -8036,6 +8057,11 @@ static int nf_tables_commit(struct net *net, struct sk_buff *skb) te->set->ops->remove(net, te->set, &te->elem); atomic_dec(&te->set->nelems); te->set->ndeact--; + if (te->set->ops->commit && + list_empty(&te->set->pending_update)) { + list_add_tail(&te->set->pending_update, + &set_update_list); + } break; case NFT_MSG_NEWOBJ: if (nft_trans_obj_update(trans)) { @@ -8074,6 +8100,8 @@ static int nf_tables_commit(struct net *net, struct sk_buff *skb) } } + nft_set_commit_update(&set_update_list); + nft_commit_notify(net, NETLINK_CB(skb).portid); nf_tables_gen_notify(net, skb, NFT_MSG_NEWGEN); nf_tables_commit_audit_log(&adl, net->nft.base_seq); @@ -8126,9 +8154,24 @@ static void nf_tables_abort_release(struct nft_trans *trans) kfree(trans); } +static void nft_set_abort_update(struct list_head *set_update_list) +{ + struct nft_set *set, *next; + + list_for_each_entry_safe(set, next, set_update_list, pending_update) { + list_del_init(&set->pending_update); + + if (!set->ops->abort) + continue; + + set->ops->abort(set); + } +} + static int __nf_tables_abort(struct net *net, enum nfnl_abort_action action) { struct nft_trans *trans, *next; + LIST_HEAD(set_update_list); struct nft_trans_elem *te; if (action == NFNL_ABORT_VALIDATE && @@ -8211,6 +8254,12 @@ static int __nf_tables_abort(struct net *net, enum nfnl_abort_action action) te = (struct nft_trans_elem *)trans->data; te->set->ops->remove(net, te->set, &te->elem); atomic_dec(&te->set->nelems); + + if (te->set->ops->abort && + list_empty(&te->set->pending_update)) { + list_add_tail(&te->set->pending_update, + &set_update_list); + } break; case NFT_MSG_DELSETELEM: te = (struct nft_trans_elem *)trans->data; @@ -8219,6 +8268,11 @@ static int __nf_tables_abort(struct net *net, enum nfnl_abort_action action) te->set->ops->activate(net, te->set, &te->elem); te->set->ndeact--; + if (te->set->ops->abort && + list_empty(&te->set->pending_update)) { + list_add_tail(&te->set->pending_update, + &set_update_list); + } nft_trans_destroy(trans); break; case NFT_MSG_NEWOBJ: @@ -8249,6 +8303,8 @@ static int __nf_tables_abort(struct net *net, enum nfnl_abort_action action) } } + nft_set_abort_update(&set_update_list); + synchronize_rcu(); list_for_each_entry_safe_reverse(trans, next, diff --git a/net/netfilter/nft_set_pipapo.c b/net/netfilter/nft_set_pipapo.c index 60cc5e253f..9ae3743c07 100644 --- a/net/netfilter/nft_set_pipapo.c +++ b/net/netfilter/nft_set_pipapo.c @@ -1495,17 +1495,10 @@ static void pipapo_free_fields(struct nft_pipapo_match *m) } } -/** - * pipapo_reclaim_match - RCU callback to free fields from old matching data - * @rcu: RCU head - */ -static void pipapo_reclaim_match(struct rcu_head *rcu) +static void pipapo_free_match(struct nft_pipapo_match *m) { - struct nft_pipapo_match *m; int i; - m = container_of(rcu, struct nft_pipapo_match, rcu); - for_each_possible_cpu(i) kfree(*per_cpu_ptr(m->scratch, i)); @@ -1517,7 +1510,19 @@ static void pipapo_reclaim_match(struct rcu_head *rcu) } /** - * pipapo_commit() - Replace lookup data with current working copy + * pipapo_reclaim_match - RCU callback to free fields from old matching data + * @rcu: RCU head + */ +static void pipapo_reclaim_match(struct rcu_head *rcu) +{ + struct nft_pipapo_match *m; + + m = container_of(rcu, struct nft_pipapo_match, rcu); + pipapo_free_match(m); +} + +/** + * nft_pipapo_commit() - Replace lookup data with current working copy * @set: nftables API set representation * * While at it, check if we should perform garbage collection on the working @@ -1527,7 +1532,7 @@ static void pipapo_reclaim_match(struct rcu_head *rcu) * We also need to create a new working copy for subsequent insertions and * deletions. */ -static void pipapo_commit(const struct nft_set *set) +static void nft_pipapo_commit(const struct nft_set *set) { struct nft_pipapo *priv = nft_set_priv(set); struct nft_pipapo_match *new_clone, *old; @@ -1552,6 +1557,26 @@ static void pipapo_commit(const struct nft_set *set) priv->clone = new_clone; } +static void nft_pipapo_abort(const struct nft_set *set) +{ + struct nft_pipapo *priv = nft_set_priv(set); + struct nft_pipapo_match *new_clone, *m; + + if (!priv->dirty) + return; + + m = rcu_dereference(priv->match); + + new_clone = pipapo_clone(m); + if (IS_ERR(new_clone)) + return; + + priv->dirty = false; + + pipapo_free_match(priv->clone); + priv->clone = new_clone; +} + /** * nft_pipapo_activate() - Mark element reference as active given key, commit * @net: Network namespace @@ -1559,8 +1584,7 @@ static void pipapo_commit(const struct nft_set *set) * @elem: nftables API element representation containing key data * * On insertion, elements are added to a copy of the matching data currently - * in use for lookups, and not directly inserted into current lookup data, so - * we'll take care of that by calling pipapo_commit() here. Both + * in use for lookups, and not directly inserted into current lookup data. Both * nft_pipapo_insert() and nft_pipapo_activate() are called once for each * element, hence we can't purpose either one as a real commit operation. */ @@ -1576,8 +1600,6 @@ static void nft_pipapo_activate(const struct net *net, nft_set_elem_change_active(net, set, &e->ext); nft_set_elem_clear_busy(&e->ext); - - pipapo_commit(set); } /** @@ -1823,7 +1845,6 @@ static void nft_pipapo_remove(const struct net *net, const struct nft_set *set, if (i == m->field_count) { priv->dirty = true; pipapo_drop(m, rulemap); - pipapo_commit(set); return; } @@ -2132,6 +2153,8 @@ struct nft_set_type nft_set_pipapo_type __read_mostly = { .init = nft_pipapo_init, .destroy = nft_pipapo_destroy, .gc_init = nft_pipapo_gc_init, + .commit = nft_pipapo_commit, + .abort = nft_pipapo_abort, .elemsize = offsetof(struct nft_pipapo_elem, ext), }, }; From e147d6a987c00d22de00fbf9145a76aec75afcb3 Mon Sep 17 00:00:00 2001 From: Greg Rose Date: Mon, 21 Oct 2024 10:43:19 -0700 Subject: [PATCH 07/49] netfilter: nf_tables: disallow element updates of bound anonymous sets jira VULN-429 subsystem-sync netfilter:nf_tables 4.18.0-534 commit-author Pablo Neira Ayuso commit c88c535b592d3baeee74009f3eceeeaf0fdd5e1b Anonymous sets come with NFT_SET_CONSTANT from userspace. Although API allows to create anonymous sets without NFT_SET_CONSTANT, it makes no sense to allow to add and to delete elements for bound anonymous sets. Fixes: 96518518cc41 ("netfilter: add nftables") Signed-off-by: Pablo Neira Ayuso (cherry picked from commit c88c535b592d3baeee74009f3eceeeaf0fdd5e1b) Signed-off-by: Greg Rose --- net/netfilter/nf_tables_api.c | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/net/netfilter/nf_tables_api.c b/net/netfilter/nf_tables_api.c index 17fb43e246..ba549cf762 100644 --- a/net/netfilter/nf_tables_api.c +++ b/net/netfilter/nf_tables_api.c @@ -5648,7 +5648,8 @@ static int nf_tables_newsetelem(struct net *net, struct sock *nlsk, if (IS_ERR(set)) return PTR_ERR(set); - if (!list_empty(&set->bindings) && set->flags & NFT_SET_CONSTANT) + if (!list_empty(&set->bindings) && + (set->flags & (NFT_SET_CONSTANT | NFT_SET_ANONYMOUS))) return -EBUSY; nla_for_each_nested(attr, nla[NFTA_SET_ELEM_LIST_ELEMENTS], rem) { @@ -5853,7 +5854,9 @@ static int nf_tables_delsetelem(struct net *net, struct sock *nlsk, set = nft_set_lookup(ctx.table, nla[NFTA_SET_ELEM_LIST_SET], genmask); if (IS_ERR(set)) return PTR_ERR(set); - if (!list_empty(&set->bindings) && set->flags & NFT_SET_CONSTANT) + + if (!list_empty(&set->bindings) && + (set->flags & (NFT_SET_CONSTANT | NFT_SET_ANONYMOUS))) return -EBUSY; if (nla[NFTA_SET_ELEM_LIST_ELEMENTS] == NULL) { From d42d3d6d90220c5989f88bdfd2807aa124ac1757 Mon Sep 17 00:00:00 2001 From: Greg Rose Date: Mon, 21 Oct 2024 12:00:03 -0700 Subject: [PATCH 08/49] netfilter: nf_tables: disallow updates of anonymous sets jira VULN-429 subsystem-sync netfilter:nf_tables 4.18.0-534 commit-author Pablo Neira Ayuso commit b770283c98e0eee9133c47bc03b6cc625dc94723 Disallow updates of set timeout and garbage collection parameters for anonymous sets. Fixes: 123b99619cca ("netfilter: nf_tables: honor set timeout and garbage collection updates") Signed-off-by: Pablo Neira Ayuso (cherry picked from commit b770283c98e0eee9133c47bc03b6cc625dc94723) Signed-off-by: Greg Rose --- net/netfilter/nf_tables_api.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/net/netfilter/nf_tables_api.c b/net/netfilter/nf_tables_api.c index ba549cf762..6a183e0021 100644 --- a/net/netfilter/nf_tables_api.c +++ b/net/netfilter/nf_tables_api.c @@ -4207,6 +4207,9 @@ static int nf_tables_newset(struct net *net, struct sock *nlsk, if (nlh->nlmsg_flags & NLM_F_REPLACE) return -EOPNOTSUPP; + if (nft_set_is_anonymous(set)) + return -EOPNOTSUPP; + return 0; } From 0891f13039b6995ef9bb67ff6ae9918b61b4dfec Mon Sep 17 00:00:00 2001 From: Greg Rose Date: Mon, 21 Oct 2024 14:04:44 -0700 Subject: [PATCH 09/49] netfilter: nf_tables: disallow timeout for anonymous sets jira VULN-429 subsystem-sync netfilter:nf_tables 4.18.0-534 commit-author Pablo Neira Ayuso commit e26d3009efda338f19016df4175f354a9bd0a4ab Never used from userspace, disallow these parameters. Signed-off-by: Pablo Neira Ayuso (cherry picked from commit e26d3009efda338f19016df4175f354a9bd0a4ab) Signed-off-by: Greg Rose --- net/netfilter/nf_tables_api.c | 11 +++++++++++ 1 file changed, 11 insertions(+) diff --git a/net/netfilter/nf_tables_api.c b/net/netfilter/nf_tables_api.c index 6a183e0021..f9977814bb 100644 --- a/net/netfilter/nf_tables_api.c +++ b/net/netfilter/nf_tables_api.c @@ -4156,6 +4156,9 @@ static int nf_tables_newset(struct net *net, struct sock *nlsk, if (!(flags & NFT_SET_TIMEOUT)) return -EINVAL; + if (flags & NFT_SET_ANONYMOUS) + return -EOPNOTSUPP; + err = nf_msecs_to_jiffies64(nla[NFTA_SET_TIMEOUT], &timeout); if (err) return err; @@ -4164,6 +4167,10 @@ static int nf_tables_newset(struct net *net, struct sock *nlsk, if (nla[NFTA_SET_GC_INTERVAL] != NULL) { if (!(flags & NFT_SET_TIMEOUT)) return -EINVAL; + + if (flags & NFT_SET_ANONYMOUS) + return -EOPNOTSUPP; + gc_int = ntohl(nla_get_be32(nla[NFTA_SET_GC_INTERVAL])); } @@ -5348,6 +5355,10 @@ static int nft_add_set_elem(struct nft_ctx *ctx, struct nft_set *set, if (nla[NFTA_SET_ELEM_TIMEOUT] != NULL) { if (!(set->flags & NFT_SET_TIMEOUT)) return -EINVAL; + + if (flags & NFT_SET_ANONYMOUS) + return -EOPNOTSUPP; + err = nf_msecs_to_jiffies64(nla[NFTA_SET_ELEM_TIMEOUT], &timeout); if (err) From b3187691ad711a82cdc5ba83619d08cfce55e25a Mon Sep 17 00:00:00 2001 From: Greg Rose Date: Mon, 21 Oct 2024 14:41:41 -0700 Subject: [PATCH 10/49] netfilter: nf_tables: add nft_chain_add() jira VULN-429 subsystem-sync netfilter:nf_tables 4.18.0-534 commit-author Pablo Neira Ayuso commit 04b7db414490ea9254d0c1d8930ea9571f8ce9f0 upstream-diff same as most in this series, patch looks identical but has offset fuzz. This patch adds a helper function to add the chain to the hashtable and the chain list. Signed-off-by: Pablo Neira Ayuso (cherry picked from commit 04b7db414490ea9254d0c1d8930ea9571f8ce9f0) Signed-off-by: Greg Rose --- net/netfilter/nf_tables_api.c | 27 +++++++++++++++++++-------- 1 file changed, 19 insertions(+), 8 deletions(-) diff --git a/net/netfilter/nf_tables_api.c b/net/netfilter/nf_tables_api.c index f9977814bb..dbd910c5c3 100644 --- a/net/netfilter/nf_tables_api.c +++ b/net/netfilter/nf_tables_api.c @@ -1986,6 +1986,20 @@ static int nft_basechain_init(struct nft_base_chain *basechain, u8 family, return 0; } +static int nft_chain_add(struct nft_table *table, struct nft_chain *chain) +{ + int err; + + err = rhltable_insert_key(&table->chains_ht, chain->name, + &chain->rhlhead, nft_chain_ht_params); + if (err) + return err; + + list_add_tail_rcu(&chain->list, &table->chains); + + return 0; +} + static int nf_tables_addchain(struct nft_ctx *ctx, u8 family, u8 genmask, u8 policy, u32 flags) { @@ -2065,16 +2079,9 @@ static int nf_tables_addchain(struct nft_ctx *ctx, u8 family, u8 genmask, if (err < 0) goto err1; - err = rhltable_insert_key(&table->chains_ht, chain->name, - &chain->rhlhead, nft_chain_ht_params); - if (err) - goto err2; - trans = nft_trans_chain_add(ctx, NFT_MSG_NEWCHAIN); if (IS_ERR(trans)) { err = PTR_ERR(trans); - rhltable_remove(&table->chains_ht, &chain->rhlhead, - nft_chain_ht_params); goto err2; } @@ -2082,9 +2089,13 @@ static int nf_tables_addchain(struct nft_ctx *ctx, u8 family, u8 genmask, if (nft_is_base_chain(chain)) nft_trans_chain_policy(trans) = policy; + err = nft_chain_add(table, chain); + if (err < 0) { + nft_trans_destroy(trans); + goto err2; + } table->use++; - list_add_tail_rcu(&chain->list, &table->chains); return 0; err2: From 76baffab41c7290e517dc16e10233a2bded5ae4e Mon Sep 17 00:00:00 2001 From: Greg Rose Date: Tue, 22 Oct 2024 09:34:30 -0700 Subject: [PATCH 11/49] netfilter: nf_tables: report use refcount overflow jira VULN-429 subsystem-sync netfilter:nf_tables 4.18.0-534 commit-author Pablo Neira Ayuso commit 1689f25924ada8fe14a4a82c38925d04994c7142 upstream-diff This cherry pick is a complete mess and I tried to follow the 5.18.0-534 code as the guiding light, but the upstream diff is a large. Overflow use refcount checks are not complete. Add helper function to deal with object reference counter tracking. Report -EMFILE in case UINT_MAX is reached. nft_use_dec() splats in case that reference counter underflows, which should not ever happen. Add nft_use_inc_restore() and nft_use_dec_restore() which are used to restore reference counter from error and abort paths. Use u32 in nft_flowtable and nft_object since helper functions cannot work on bitfields. Remove the few early incomplete checks now that the helper functions are in place and used to check for refcount overflow. Fixes: 96518518cc41 ("netfilter: add nftables") Signed-off-by: Pablo Neira Ayuso (cherry picked from commit 1689f25924ada8fe14a4a82c38925d04994c7142) Signed-off-by: Greg Rose --- include/net/netfilter/nf_tables.h | 31 ++++- net/netfilter/nf_tables_api.c | 189 ++++++++++++++++++------------ net/netfilter/nft_flow_offload.c | 6 +- net/netfilter/nft_objref.c | 8 +- 4 files changed, 151 insertions(+), 83 deletions(-) diff --git a/include/net/netfilter/nf_tables.h b/include/net/netfilter/nf_tables.h index a34c506de0..2865638e0a 100644 --- a/include/net/netfilter/nf_tables.h +++ b/include/net/netfilter/nf_tables.h @@ -1079,6 +1079,29 @@ int __nft_release_basechain(struct nft_ctx *ctx); unsigned int nft_do_chain(struct nft_pktinfo *pkt, void *priv); +static inline bool nft_use_inc(u32 *use) +{ + if (*use == UINT_MAX) + return false; + + (*use)++; + + return true; +} + +static inline void nft_use_dec(u32 *use) +{ + WARN_ON_ONCE((*use)-- == 0); +} + +/* For error and abort path: restore use counter to previous state. */ +static inline void nft_use_inc_restore(u32 *use) +{ + WARN_ON_ONCE(!nft_use_inc(use)); +} + +#define nft_use_dec_restore nft_use_dec + /** * struct nft_table - nf_tables table * @@ -1148,8 +1171,8 @@ struct nft_object { struct list_head list; struct rhlist_head rhlhead; struct nft_object_hash_key key; - u32 genmask:2, - use:30; + u32 genmask:2; + u32 use; u64 handle; /* runtime data below here */ const struct nft_object_ops *ops ____cacheline_aligned; @@ -1249,8 +1272,8 @@ struct nft_flowtable { char *name; int hooknum; int ops_len; - u32 genmask:2, - use:30; + u32 genmask:2; + u32 use; u64 handle; /* runtime data below here */ struct list_head hook_list ____cacheline_aligned; diff --git a/net/netfilter/nf_tables_api.c b/net/netfilter/nf_tables_api.c index dbd910c5c3..a10c3d593d 100644 --- a/net/netfilter/nf_tables_api.c +++ b/net/netfilter/nf_tables_api.c @@ -349,7 +349,7 @@ static int nft_delchain(struct nft_ctx *ctx) if (IS_ERR(trans)) return PTR_ERR(trans); - ctx->table->use--; + nft_use_dec(&ctx->table->use); nft_deactivate_next(ctx->net, ctx->chain); return 0; @@ -390,7 +390,7 @@ nf_tables_delrule_deactivate(struct nft_ctx *ctx, struct nft_rule *rule) /* You cannot delete the same rule twice */ if (nft_is_active_next(ctx->net, rule)) { nft_deactivate_next(ctx->net, rule); - ctx->chain->use--; + nft_use_dec(&ctx->chain->use); return 0; } return -ENOENT; @@ -490,7 +490,7 @@ static int nft_delset(const struct nft_ctx *ctx, struct nft_set *set) return err; nft_deactivate_next(ctx->net, set); - ctx->table->use--; + nft_use_dec(&ctx->table->use); return err; } @@ -522,7 +522,7 @@ static int nft_delobj(struct nft_ctx *ctx, struct nft_object *obj) return err; nft_deactivate_next(ctx->net, obj); - ctx->table->use--; + nft_use_dec(&ctx->table->use); return err; } @@ -556,7 +556,7 @@ static int nft_delflowtable(struct nft_ctx *ctx, return err; nft_deactivate_next(ctx->net, flowtable); - ctx->table->use--; + nft_use_dec(&ctx->table->use); return err; } @@ -2012,9 +2012,6 @@ static int nf_tables_addchain(struct nft_ctx *ctx, u8 family, u8 genmask, struct nft_rule **rules; int err; - if (table->use == UINT_MAX) - return -EOVERFLOW; - if (nla[NFTA_CHAIN_HOOK]) { struct nft_stats __percpu *stats = NULL; struct nft_chain_hook hook; @@ -2079,6 +2076,11 @@ static int nf_tables_addchain(struct nft_ctx *ctx, u8 family, u8 genmask, if (err < 0) goto err1; + if (!nft_use_inc(&table->use)) { + err = -EMFILE; + goto err_use; + } + trans = nft_trans_chain_add(ctx, NFT_MSG_NEWCHAIN); if (IS_ERR(trans)) { err = PTR_ERR(trans); @@ -2095,10 +2097,10 @@ static int nf_tables_addchain(struct nft_ctx *ctx, u8 family, u8 genmask, goto err2; } - table->use++; - return 0; err2: + nft_use_dec_restore(&table->use); +err_use: nf_tables_unregister_hook(net, table, chain); err1: nf_tables_chain_destroy(ctx); @@ -3158,9 +3160,6 @@ static int nf_tables_newrule(struct net *net, struct sock *nlsk, return -EINVAL; handle = nf_tables_alloc_handle(table); - if (chain->use == UINT_MAX) - return -EOVERFLOW; - if (nla[NFTA_RULE_POSITION]) { pos_handle = be64_to_cpu(nla_get_be64(nla[NFTA_RULE_POSITION])); old_rule = __nft_rule_lookup(chain, pos_handle); @@ -3250,6 +3249,11 @@ static int nf_tables_newrule(struct net *net, struct sock *nlsk, } } + if (!nft_use_inc(&chain->use)) { + err = -EMFILE; + goto err_release_rule; + } + if (nlh->nlmsg_flags & NLM_F_REPLACE) { trans = nft_trans_rule_add(&ctx, NFT_MSG_NEWRULE, rule); if (trans == NULL) { @@ -3283,7 +3287,6 @@ static int nf_tables_newrule(struct net *net, struct sock *nlsk, } } kvfree(info); - chain->use++; if (flow) nft_trans_flow_rule(trans) = flow; @@ -3293,6 +3296,7 @@ static int nf_tables_newrule(struct net *net, struct sock *nlsk, return 0; err_destroy_flow_rule: + nft_use_dec_restore(&chain->use); if (flow) nft_flow_rule_destroy(flow); err_release_rule: @@ -4246,10 +4250,13 @@ static int nf_tables_newset(struct net *net, struct sock *nlsk, if (ops->privsize != NULL) size = ops->privsize(nla, &desc); + if (!nft_use_inc(&table->use)) + return -EMFILE; + set = kvzalloc(sizeof(*set) + size + udlen, GFP_KERNEL); if (!set) { err = -ENOMEM; - goto err1; + goto err_alloc; } name = nla_strdup(nla[NFTA_SET_NAME], GFP_KERNEL); @@ -4339,7 +4346,7 @@ static int nf_tables_newset(struct net *net, struct sock *nlsk, goto err_set_expr_alloc; list_add_tail_rcu(&set->list, &table->sets); - table->use++; + return 0; err_set_expr_alloc: @@ -4351,8 +4358,10 @@ static int nf_tables_newset(struct net *net, struct sock *nlsk, kfree(set->name); err_set_name: kvfree(set); -err1: +err_alloc: module_put(to_set_type(ops)->owner); + nft_use_dec_restore(&table->use); + return err; } @@ -4436,9 +4445,6 @@ int nf_tables_bind_set(const struct nft_ctx *ctx, struct nft_set *set, struct nft_set_binding *i; struct nft_set_iter iter; - if (set->use == UINT_MAX) - return -EOVERFLOW; - if (!list_empty(&set->bindings) && nft_set_is_anonymous(set)) return -EBUSY; @@ -4463,10 +4469,12 @@ int nf_tables_bind_set(const struct nft_ctx *ctx, struct nft_set *set, return iter.err; } bind: + if (!nft_use_inc(&set->use)) + return -EMFILE; + binding->chain = ctx->chain; list_add_tail_rcu(&binding->list, &set->bindings); nft_set_trans_bind(ctx, set); - set->use++; return 0; } @@ -4491,7 +4499,7 @@ void nf_tables_activate_set(const struct nft_ctx *ctx, struct nft_set *set) if (nft_set_is_anonymous(set)) nft_clear(ctx->net, set); - set->use++; + nft_use_inc_restore(&set->use); } EXPORT_SYMBOL_GPL(nf_tables_activate_set); @@ -4507,18 +4515,17 @@ void nf_tables_deactivate_set(const struct nft_ctx *ctx, struct nft_set *set, else list_del_rcu(&binding->list); - set->use--; + nft_use_dec(&set->use); break; case NFT_TRANS_PREPARE: if (nft_set_is_anonymous(set)) nft_deactivate_next(ctx->net, set); - - set->use--; + nft_use_dec(&set->use); return; case NFT_TRANS_ABORT: case NFT_TRANS_RELEASE: - set->use--; - /* fall through */ + nft_use_dec(&set->use); + fallthrough; default: nf_tables_unbind_set(ctx, set, binding, phase == NFT_TRANS_COMMIT); @@ -5189,7 +5196,7 @@ void nft_set_elem_destroy(const struct nft_set *set, void *elem, nft_set_elem_expr_destroy(&ctx, nft_set_ext_expr(ext)); if (nft_set_ext_exists(ext, NFT_SET_EXT_OBJREF)) - (*nft_set_ext_obj(ext))->use--; + nft_use_dec(&(*nft_set_ext_obj(ext))->use); kfree(elem); } EXPORT_SYMBOL_GPL(nft_set_elem_destroy); @@ -5496,8 +5503,16 @@ static int nft_add_set_elem(struct nft_ctx *ctx, struct nft_set *set, set->objtype, genmask); if (IS_ERR(obj)) { err = PTR_ERR(obj); + obj = NULL; + goto err_parse_key_end; + } + + if (!nft_use_inc(&obj->use)) { + err = -EMFILE; + obj = NULL; goto err_parse_key_end; } + err = nft_set_ext_add(&tmpl, NFT_SET_EXT_OBJREF); if (err < 0) goto err_parse_key_end; @@ -5576,6 +5591,10 @@ static int nft_add_set_elem(struct nft_ctx *ctx, struct nft_set *set, ext = nft_set_elem_ext(set, elem.priv); if (flags) *nft_set_ext_flags(ext) = flags; + + if (obj) + *nft_set_ext_obj(ext) = obj; + if (ulen > 0) { udata = nft_set_ext_userdata(ext); udata->len = ulen - 1; @@ -5631,14 +5650,14 @@ static int nft_add_set_elem(struct nft_ctx *ctx, struct nft_set *set, err_element_clash: kfree(trans); err_elem_expr: - if (obj) - obj->use--; - nf_tables_set_elem_destroy(ctx, set, elem.priv); err_parse_data: if (nla[NFTA_SET_ELEM_DATA] != NULL) nft_data_release(&data, desc.type); err_parse_key_end: + if (obj) + nft_use_dec_restore(&obj->use); + nft_data_release(&elem.key_end.val, NFT_DATA_VALUE); err_parse_key: nft_data_release(&elem.key.val, NFT_DATA_VALUE); @@ -5702,11 +5721,14 @@ static int nf_tables_newsetelem(struct net *net, struct sock *nlsk, */ void nft_data_hold(const struct nft_data *data, enum nft_data_types type) { + struct nft_chain *chain; + if (type == NFT_DATA_VERDICT) { switch (data->verdict.code) { case NFT_JUMP: case NFT_GOTO: - data->verdict.chain->use++; + chain = data->verdict.chain; + nft_use_inc_restore(&chain->use); break; } } @@ -5721,7 +5743,7 @@ static void nft_set_elem_activate(const struct net *net, if (nft_set_ext_exists(ext, NFT_SET_EXT_DATA)) nft_data_hold(nft_set_ext_data(ext), set->dtype); if (nft_set_ext_exists(ext, NFT_SET_EXT_OBJREF)) - (*nft_set_ext_obj(ext))->use++; + nft_use_inc_restore(&(*nft_set_ext_obj(ext))->use); } static void nft_set_elem_deactivate(const struct net *net, @@ -5733,7 +5755,7 @@ static void nft_set_elem_deactivate(const struct net *net, if (nft_set_ext_exists(ext, NFT_SET_EXT_DATA)) nft_data_release(nft_set_ext_data(ext), set->dtype); if (nft_set_ext_exists(ext, NFT_SET_EXT_OBJREF)) - (*nft_set_ext_obj(ext))->use--; + nft_use_dec(&(*nft_set_ext_obj(ext))->use); } static int nft_del_setelem(struct nft_ctx *ctx, struct nft_set *set, @@ -6212,9 +6234,14 @@ static int nf_tables_newobj(struct net *net, struct sock *nlsk, nft_ctx_init(&ctx, net, skb, nlh, family, table, NULL, nla); + if (!nft_use_inc(&table->use)) + return -EMFILE; + type = nft_obj_type_get(net, objtype); - if (IS_ERR(type)) - return PTR_ERR(type); + if (IS_ERR(type)) { + err = PTR_ERR(type); + goto err_type; + } obj = nft_obj_init(&ctx, type, nla[NFTA_OBJ_DATA]); if (IS_ERR(obj)) { @@ -6240,7 +6267,7 @@ static int nf_tables_newobj(struct net *net, struct sock *nlsk, goto err4; list_add_tail_rcu(&obj->list, &table->objects); - table->use++; + return 0; err4: /* queued in transaction log */ @@ -6254,6 +6281,9 @@ static int nf_tables_newobj(struct net *net, struct sock *nlsk, kfree(obj); err1: module_put(type->owner); +err_type: + nft_use_dec_restore(&table->use); + return err; } @@ -6639,8 +6669,8 @@ void nf_tables_deactivate_flowtable(const struct nft_ctx *ctx, case NFT_TRANS_PREPARE: case NFT_TRANS_ABORT: case NFT_TRANS_RELEASE: - flowtable->use--; - /* fall through */ + nft_use_dec(&flowtable->use); + fallthrough; default: return; } @@ -6871,9 +6901,14 @@ static int nf_tables_newflowtable(struct net *net, struct sock *nlsk, nft_ctx_init(&ctx, net, skb, nlh, family, table, NULL, nla); + if (!nft_use_inc(&table->use)) + return -EMFILE; + flowtable = kzalloc(sizeof(*flowtable), GFP_KERNEL); - if (!flowtable) - return -ENOMEM; + if (!flowtable) { + err = -ENOMEM; + goto flowtable_alloc; + } flowtable->table = table; flowtable->handle = nf_tables_alloc_handle(table); @@ -6925,7 +6960,6 @@ static int nf_tables_newflowtable(struct net *net, struct sock *nlsk, goto err5; list_add_tail_rcu(&flowtable->list, &table->flowtables); - table->use++; return 0; err5: @@ -6942,6 +6976,9 @@ static int nf_tables_newflowtable(struct net *net, struct sock *nlsk, kfree(flowtable->name); err1: kfree(flowtable); +flowtable_alloc: + nft_use_dec_restore(&table->use); + return err; } @@ -7213,7 +7250,7 @@ static void nf_tables_flowtable_notify(struct nft_ctx *ctx, flags |= ctx->flags & (NLM_F_CREATE | NLM_F_EXCL); err = nf_tables_fill_flowtable_info(skb, ctx->net, ctx->portid, - ctx->seq, event, flags, + ctx->seq, event, 0, ctx->family, flowtable); if (err < 0) { kfree_skb(skb); @@ -8051,7 +8088,7 @@ static int nf_tables_commit(struct net *net, struct sk_buff *skb) */ if (nft_set_is_anonymous(nft_trans_set(trans)) && !list_empty(&nft_trans_set(trans)->bindings)) - trans->ctx.table->use--; + nft_use_dec(&trans->ctx.table->use); nf_tables_set_notify(&trans->ctx, nft_trans_set(trans), NFT_MSG_NEWSET, GFP_KERNEL); @@ -8231,7 +8268,7 @@ static int __nf_tables_abort(struct net *net, enum nfnl_abort_action action) kfree(nft_trans_chain_name(trans)); nft_trans_destroy(trans); } else { - trans->ctx.table->use--; + nft_use_dec_restore(&trans->ctx.table->use); nft_chain_del(trans->ctx.chain); nf_tables_unregister_hook(trans->ctx.net, trans->ctx.table, @@ -8239,12 +8276,12 @@ static int __nf_tables_abort(struct net *net, enum nfnl_abort_action action) } break; case NFT_MSG_DELCHAIN: - trans->ctx.table->use++; + nft_use_inc_restore(&trans->ctx.chain->use); nft_clear(trans->ctx.net, trans->ctx.chain); nft_trans_destroy(trans); break; case NFT_MSG_NEWRULE: - trans->ctx.chain->use--; + nft_use_dec_restore(&trans->ctx.chain->use); list_del_rcu(&nft_trans_rule(trans)->list); nft_rule_expr_deactivate(&trans->ctx, nft_trans_rule(trans), @@ -8253,7 +8290,7 @@ static int __nf_tables_abort(struct net *net, enum nfnl_abort_action action) nft_flow_rule_destroy(nft_trans_flow_rule(trans)); break; case NFT_MSG_DELRULE: - trans->ctx.chain->use++; + nft_use_inc_restore(&trans->ctx.chain->use); nft_clear(trans->ctx.net, nft_trans_rule(trans)); nft_rule_expr_activate(&trans->ctx, nft_trans_rule(trans)); if (trans->ctx.chain->flags & NFT_CHAIN_HW_OFFLOAD) @@ -8262,7 +8299,7 @@ static int __nf_tables_abort(struct net *net, enum nfnl_abort_action action) nft_trans_destroy(trans); break; case NFT_MSG_NEWSET: - trans->ctx.table->use--; + nft_use_dec_restore(&trans->ctx.table->use); if (nft_trans_set_bound(trans)) { nft_trans_destroy(trans); break; @@ -8270,7 +8307,7 @@ static int __nf_tables_abort(struct net *net, enum nfnl_abort_action action) list_del_rcu(&nft_trans_set(trans)->list); break; case NFT_MSG_DELSET: - trans->ctx.table->use++; + nft_use_inc_restore(&trans->ctx.table->use); nft_clear(trans->ctx.net, nft_trans_set(trans)); nft_trans_destroy(trans); break; @@ -8308,23 +8345,23 @@ static int __nf_tables_abort(struct net *net, enum nfnl_abort_action action) nft_obj_destroy(&trans->ctx, nft_trans_obj_newobj(trans)); nft_trans_destroy(trans); } else { - trans->ctx.table->use--; + nft_use_dec_restore(&trans->ctx.table->use); nft_obj_del(nft_trans_obj(trans)); } break; case NFT_MSG_DELOBJ: - trans->ctx.table->use++; + nft_use_inc_restore(&trans->ctx.table->use); nft_clear(trans->ctx.net, nft_trans_obj(trans)); nft_trans_destroy(trans); break; case NFT_MSG_NEWFLOWTABLE: - trans->ctx.table->use--; + nft_use_dec_restore(&trans->ctx.table->use); list_del_rcu(&nft_trans_flowtable(trans)->list); nft_unregister_flowtable_net_hooks(net, - nft_trans_flowtable(trans)); - break; + nft_trans_flowtable(trans)); case NFT_MSG_DELFLOWTABLE: - trans->ctx.table->use++; + nft_use_inc_restore(&trans->ctx.table->use); + list_del_rcu(&nft_trans_flowtable(trans)->list); nft_clear(trans->ctx.net, nft_trans_flowtable(trans)); nft_trans_destroy(trans); break; @@ -8716,8 +8753,9 @@ static int nft_verdict_init(const struct nft_ctx *ctx, struct nft_data *data, return PTR_ERR(chain); if (nft_is_base_chain(chain)) return -EOPNOTSUPP; + if (!nft_use_inc(&chain->use)) + return -EMFILE; - chain->use++; data->verdict.chain = chain; break; default: @@ -8731,10 +8769,13 @@ static int nft_verdict_init(const struct nft_ctx *ctx, struct nft_data *data, static void nft_verdict_uninit(const struct nft_data *data) { + struct nft_chain *chain; + switch (data->verdict.code) { case NFT_JUMP: case NFT_GOTO: - data->verdict.chain->use--; + chain = data->verdict.chain; + nft_use_dec(&chain->use); break; } } @@ -8888,11 +8929,11 @@ int __nft_release_basechain(struct nft_ctx *ctx) nf_tables_unregister_hook(ctx->net, ctx->chain->table, ctx->chain); list_for_each_entry_safe(rule, nr, &ctx->chain->rules, list) { list_del(&rule->list); - ctx->chain->use--; + nft_use_dec(&ctx->chain->use); nf_tables_rule_release(ctx, rule); } nft_chain_del(ctx->chain); - ctx->table->use--; + nft_use_dec(&ctx->table->use); nf_tables_chain_destroy(ctx); return 0; @@ -8933,35 +8974,35 @@ static void __nft_release_tables(struct net *net) }; list_for_each_entry_safe(table, nt, &net->nft.tables, list) { - ctx.family = table->family; - ctx.table = table; - list_for_each_entry(chain, &table->chains, list) { - ctx.chain = chain; - list_for_each_entry_safe(rule, nr, &chain->rules, list) { - list_del(&rule->list); - chain->use--; - nf_tables_rule_release(&ctx, rule); - } - } + ctx.family = table->family; + ctx.table = table; + list_for_each_entry(chain, &table->chains, list) { + ctx.chain = chain; + list_for_each_entry_safe(rule, nr, &chain->rules, list) { + list_del(&rule->list); + nft_use_dec(&chain->use); + nf_tables_rule_release(&ctx, rule); + } + } list_for_each_entry_safe(flowtable, nf, &table->flowtables, list) { list_del(&flowtable->list); - table->use--; + nft_use_dec(&table->use); nf_tables_flowtable_destroy(flowtable); } list_for_each_entry_safe(set, ns, &table->sets, list) { list_del(&set->list); - table->use--; + nft_use_dec(&table->use); nft_set_destroy(&ctx, set); } list_for_each_entry_safe(obj, ne, &table->objects, list) { nft_obj_del(obj); - table->use--; + nft_use_dec(&table->use); nft_obj_destroy(&ctx, obj); } list_for_each_entry_safe(chain, nc, &table->chains, list) { ctx.chain = chain; nft_chain_del(chain); - table->use--; + nft_use_dec(&table->use); nf_tables_chain_destroy(&ctx); } list_del(&table->list); diff --git a/net/netfilter/nft_flow_offload.c b/net/netfilter/nft_flow_offload.c index 5aac5c994c..66619fd61a 100644 --- a/net/netfilter/nft_flow_offload.c +++ b/net/netfilter/nft_flow_offload.c @@ -169,8 +169,10 @@ static int nft_flow_offload_init(const struct nft_ctx *ctx, if (IS_ERR(flowtable)) return PTR_ERR(flowtable); + if (!nft_use_inc(&flowtable->use)) + return -EMFILE; + priv->flowtable = flowtable; - flowtable->use++; return nf_ct_netns_get(ctx->net, ctx->family); } @@ -189,7 +191,7 @@ static void nft_flow_offload_activate(const struct nft_ctx *ctx, { struct nft_flow_offload *priv = nft_expr_priv(expr); - priv->flowtable->use++; + nft_use_inc_restore(&priv->flowtable->use); } static void nft_flow_offload_destroy(const struct nft_ctx *ctx, diff --git a/net/netfilter/nft_objref.c b/net/netfilter/nft_objref.c index 54e0f94498..b04f92607f 100644 --- a/net/netfilter/nft_objref.c +++ b/net/netfilter/nft_objref.c @@ -44,8 +44,10 @@ static int nft_objref_init(const struct nft_ctx *ctx, if (IS_ERR(obj)) return -ENOENT; + if (!nft_use_inc(&obj->use)) + return -EMFILE; + nft_objref_priv(expr) = obj; - obj->use++; return 0; } @@ -74,7 +76,7 @@ static void nft_objref_deactivate(const struct nft_ctx *ctx, if (phase == NFT_TRANS_COMMIT) return; - obj->use--; + nft_use_dec(&obj->use); } static void nft_objref_activate(const struct nft_ctx *ctx, @@ -82,7 +84,7 @@ static void nft_objref_activate(const struct nft_ctx *ctx, { struct nft_object *obj = nft_objref_priv(expr); - obj->use++; + nft_use_inc_restore(&obj->use); } static struct nft_expr_type nft_objref_type; From 7221a94aa6167f386638a95b4c32dfdc53087bc2 Mon Sep 17 00:00:00 2001 From: Greg Rose Date: Tue, 22 Oct 2024 09:58:39 -0700 Subject: [PATCH 12/49] netfilter: nf_tables: fix spurious set element insertion failure jira VULN-429 subsystem-sync netfilter:nf_tables 4.18.0-534 commit-author Florian Westphal commit ddbd8be68941985f166f5107109a90ce13147c44 On some platforms there is a padding hole in the nft_verdict structure, between the verdict code and the chain pointer. On element insertion, if the new element clashes with an existing one and NLM_F_EXCL flag isn't set, we want to ignore the -EEXIST error as long as the data associated with duplicated element is the same as the existing one. The data equality check uses memcmp. For normal data (NFT_DATA_VALUE) this works fine, but for NFT_DATA_VERDICT padding area leads to spurious failure even if the verdict data is the same. This then makes the insertion fail with 'already exists' error, even though the new "key : data" matches an existing entry and userspace told the kernel that it doesn't want to receive an error indication. Fixes: c016c7e45ddf ("netfilter: nf_tables: honor NLM_F_EXCL flag in set element insertion") Signed-off-by: Florian Westphal (cherry picked from commit ddbd8be68941985f166f5107109a90ce13147c44) Signed-off-by: Greg Rose --- net/netfilter/nf_tables_api.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/net/netfilter/nf_tables_api.c b/net/netfilter/nf_tables_api.c index a10c3d593d..9cfdcdd18c 100644 --- a/net/netfilter/nf_tables_api.c +++ b/net/netfilter/nf_tables_api.c @@ -8732,6 +8732,9 @@ static int nft_verdict_init(const struct nft_ctx *ctx, struct nft_data *data, if (!tb[NFTA_VERDICT_CODE]) return -EINVAL; + + /* zero padding hole for memcmp */ + memset(data, 0, sizeof(*data)); data->verdict.code = ntohl(nla_get_be32(tb[NFTA_VERDICT_CODE])); switch (data->verdict.code) { From a32ff5ba074a8e418b29cfe9b433cad9409a77f4 Mon Sep 17 00:00:00 2001 From: Greg Rose Date: Tue, 22 Oct 2024 10:14:44 -0700 Subject: [PATCH 13/49] netfilter: nf_tables: don't skip expired elements during walk jira VULN-429 subsystem-sync netfilter:nf_tables 4.18.0-534 commit-author Florian Westphal commit 24138933b97b055d486e8064b4a1721702442a9b There is an asymmetry between commit/abort and preparation phase if the following conditions are met: 1. set is a verdict map ("1.2.3.4 : jump foo") 2. timeouts are enabled In this case, following sequence is problematic: 1. element E in set S refers to chain C 2. userspace requests removal of set S 3. kernel does a set walk to decrement chain->use count for all elements from preparation phase 4. kernel does another set walk to remove elements from the commit phase (or another walk to do a chain->use increment for all elements from abort phase) If E has already expired in 1), it will be ignored during list walk, so its use count won't have been changed. Then, when set is culled, ->destroy callback will zap the element via nf_tables_set_elem_destroy(), but this function is only safe for elements that have been deactivated earlier from the preparation phase: lack of earlier deactivate removes the element but leaks the chain use count, which results in a WARN splat when the chain gets removed later, plus a leak of the nft_chain structure. Update pipapo_get() not to skip expired elements, otherwise flush command reports bogus ENOENT errors. Fixes: 3c4287f62044 ("nf_tables: Add set type for arbitrary concatenation of ranges") Fixes: 8d8540c4f5e0 ("netfilter: nft_set_rbtree: add timeout support") Fixes: 9d0982927e79 ("netfilter: nft_hash: add support for timeouts") Signed-off-by: Florian Westphal Signed-off-by: Pablo Neira Ayuso (cherry picked from commit 24138933b97b055d486e8064b4a1721702442a9b) Signed-off-by: Greg Rose --- net/netfilter/nf_tables_api.c | 4 ++++ net/netfilter/nft_set_hash.c | 2 -- net/netfilter/nft_set_pipapo.c | 18 ++++++++++++------ net/netfilter/nft_set_rbtree.c | 2 -- 4 files changed, 16 insertions(+), 10 deletions(-) diff --git a/net/netfilter/nf_tables_api.c b/net/netfilter/nf_tables_api.c index 9cfdcdd18c..b7a18ef448 100644 --- a/net/netfilter/nf_tables_api.c +++ b/net/netfilter/nf_tables_api.c @@ -4754,8 +4754,12 @@ static int nf_tables_dump_setelem(const struct nft_ctx *ctx, const struct nft_set_iter *iter, struct nft_set_elem *elem) { + const struct nft_set_ext *ext = nft_set_elem_ext(set, elem->priv); struct nft_set_dump_args *args; + if (nft_set_elem_expired(ext)) + return 0; + args = container_of(iter, struct nft_set_dump_args, iter); return nf_tables_fill_setelem(args->skb, set, elem); } diff --git a/net/netfilter/nft_set_hash.c b/net/netfilter/nft_set_hash.c index 9457b5ab11..037a2a90e4 100644 --- a/net/netfilter/nft_set_hash.c +++ b/net/netfilter/nft_set_hash.c @@ -262,8 +262,6 @@ static void nft_rhash_walk(const struct nft_ctx *ctx, struct nft_set *set, if (iter->count < iter->skip) goto cont; - if (nft_set_elem_expired(&he->ext)) - goto cont; if (!nft_set_elem_active(&he->ext, iter->genmask)) goto cont; diff --git a/net/netfilter/nft_set_pipapo.c b/net/netfilter/nft_set_pipapo.c index 9ae3743c07..90562786e3 100644 --- a/net/netfilter/nft_set_pipapo.c +++ b/net/netfilter/nft_set_pipapo.c @@ -704,8 +704,7 @@ static struct nft_pipapo_elem *pipapo_get(const struct net *net, goto out; if (last) { - if (nft_set_elem_expired(&f->mt[b].e->ext) || - (genmask && + if ((genmask && !nft_set_elem_active(&f->mt[b].e->ext, genmask))) goto next_match; @@ -739,8 +738,17 @@ static struct nft_pipapo_elem *pipapo_get(const struct net *net, void *nft_pipapo_get(const struct net *net, const struct nft_set *set, const struct nft_set_elem *elem, unsigned int flags) { - return pipapo_get(net, set, (const u8 *)elem->key.val.data, - nft_genmask_cur(net)); + struct nft_pipapo_elem *ret; + + ret = pipapo_get(net, set, (const u8 *)elem->key.val.data, + nft_genmask_cur(net)); + if (IS_ERR(ret)) + return ret; + + if (nft_set_elem_expired(&ret->ext)) + return ERR_PTR(-ENOENT); + + return ret; } /** @@ -1890,8 +1898,6 @@ static void nft_pipapo_walk(const struct nft_ctx *ctx, struct nft_set *set, goto cont; e = f->mt[r].e; - if (nft_set_elem_expired(&e->ext)) - goto cont; elem.priv = e; diff --git a/net/netfilter/nft_set_rbtree.c b/net/netfilter/nft_set_rbtree.c index 2502793439..10883b3376 100644 --- a/net/netfilter/nft_set_rbtree.c +++ b/net/netfilter/nft_set_rbtree.c @@ -446,8 +446,6 @@ static void nft_rbtree_walk(const struct nft_ctx *ctx, if (iter->count < iter->skip) goto cont; - if (nft_set_elem_expired(&rbe->ext)) - goto cont; if (!nft_set_elem_active(&rbe->ext, iter->genmask)) goto cont; From d6cf9a968b289e1bfdbe14e78042e7e78b2f9a6b Mon Sep 17 00:00:00 2001 From: Greg Rose Date: Tue, 22 Oct 2024 10:19:05 -0700 Subject: [PATCH 14/49] netfilter: nftables: rename set element data activation/deactivation functions jira VULN-429 subsystem-sync netfilter:nf_tables 4.18.0-534 commit-author Pablo Neira Ayuso commit f8bb7889af58d8e74d2d61c76b1418230f1610fa Rename: - nft_set_elem_activate() to nft_set_elem_data_activate(). - nft_set_elem_deactivate() to nft_set_elem_data_deactivate(). To prepare for updates in the set element infrastructure to add support for the special catch-all element. Signed-off-by: Pablo Neira Ayuso (cherry picked from commit f8bb7889af58d8e74d2d61c76b1418230f1610fa) Signed-off-by: Greg Rose --- net/netfilter/nf_tables_api.c | 22 +++++++++++----------- 1 file changed, 11 insertions(+), 11 deletions(-) diff --git a/net/netfilter/nf_tables_api.c b/net/netfilter/nf_tables_api.c index b7a18ef448..53fa1bae84 100644 --- a/net/netfilter/nf_tables_api.c +++ b/net/netfilter/nf_tables_api.c @@ -5205,8 +5205,8 @@ void nft_set_elem_destroy(const struct nft_set *set, void *elem, } EXPORT_SYMBOL_GPL(nft_set_elem_destroy); -/* Only called from commit path, nft_set_elem_deactivate() already deals with - * the refcounting from the preparation phase. +/* Only called from commit path, nft_setelem_data_deactivate() already deals + * with the refcounting from the preparation phase. */ static void nf_tables_set_elem_destroy(const struct nft_ctx *ctx, const struct nft_set *set, void *elem) @@ -5738,9 +5738,9 @@ void nft_data_hold(const struct nft_data *data, enum nft_data_types type) } } -static void nft_set_elem_activate(const struct net *net, - const struct nft_set *set, - struct nft_set_elem *elem) +static void nft_setelem_data_activate(const struct net *net, + const struct nft_set *set, + struct nft_set_elem *elem) { const struct nft_set_ext *ext = nft_set_elem_ext(set, elem->priv); @@ -5750,9 +5750,9 @@ static void nft_set_elem_activate(const struct net *net, nft_use_inc_restore(&(*nft_set_ext_obj(ext))->use); } -static void nft_set_elem_deactivate(const struct net *net, - const struct nft_set *set, - struct nft_set_elem *elem) +static void nft_setelem_data_deactivate(const struct net *net, + const struct nft_set *set, + struct nft_set_elem *elem) { const struct nft_set_ext *ext = nft_set_elem_ext(set, elem->priv); @@ -5839,7 +5839,7 @@ static int nft_del_setelem(struct nft_ctx *ctx, struct nft_set *set, kfree(elem.priv); elem.priv = priv; - nft_set_elem_deactivate(ctx->net, set, &elem); + nft_setelem_data_deactivate(ctx->net, set, &elem); nft_trans_elem(trans) = elem; list_add_tail(&trans->list, &ctx->net->nft.commit_list); @@ -5875,7 +5875,7 @@ static int nft_flush_set(const struct nft_ctx *ctx, } set->ndeact++; - nft_set_elem_deactivate(ctx->net, set, elem); + nft_setelem_data_deactivate(ctx->net, set, elem); nft_trans_elem_set(trans) = set; nft_trans_elem(trans) = *elem; list_add_tail(&trans->list, &ctx->net->nft.commit_list); @@ -8333,7 +8333,7 @@ static int __nf_tables_abort(struct net *net, enum nfnl_abort_action action) case NFT_MSG_DELSETELEM: te = (struct nft_trans_elem *)trans->data; - nft_set_elem_activate(net, te->set, &te->elem); + nft_setelem_data_activate(net, te->set, &te->elem); te->set->ops->activate(net, te->set, &te->elem); te->set->ndeact--; From c720248e6a2b6c9a813b9d37969261a441d4b3d8 Mon Sep 17 00:00:00 2001 From: Greg Rose Date: Tue, 22 Oct 2024 14:32:55 -0700 Subject: [PATCH 15/49] netfilter: nftables: add nft_pernet() helper function jira VULN-429 pre-cve CVE-2023-4244 commit-author Pablo Neira Ayuso commit d59d2f82f984df44b31c5d7837fc2f62268b7571 upstream-diff So many conflicts when trying to cherry pick this but they're all very similar and didn't have much trouble picking them out. As per previous commits in this series I've used 4.18.0-534 as the source of truth when resolving conflicts. Consolidate call to net_generic(net, nf_tables_net_id) in this wrapper function. Signed-off-by: Pablo Neira Ayuso (cherry picked from commit d59d2f82f984df44b31c5d7837fc2f62268b7571) Signed-off-by: Greg Rose --- include/net/netfilter/nf_tables.h | 13 +++++++++++++ net/netfilter/nf_tables_api.c | 20 ++++++++++++++++---- 2 files changed, 29 insertions(+), 4 deletions(-) diff --git a/include/net/netfilter/nf_tables.h b/include/net/netfilter/nf_tables.h index 2865638e0a..98f9f77697 100644 --- a/include/net/netfilter/nf_tables.h +++ b/include/net/netfilter/nf_tables.h @@ -13,6 +13,7 @@ #include #include #include +#include struct module; @@ -1561,4 +1562,16 @@ __printf(2, 3) int nft_request_module(struct net *net, const char *fmt, ...); #else static inline int nft_request_module(struct net *net, const char *fmt, ...) { return -ENOENT; } #endif + +struct nftables_pernet { + unsigned int gc_seq; +}; + +extern unsigned int nf_tables_net_id; + +static inline struct nftables_pernet *nft_pernet(const struct net *net) +{ + return net_generic(net, nf_tables_net_id); +} + #endif /* _NET_NF_TABLES_H */ diff --git a/net/netfilter/nf_tables_api.c b/net/netfilter/nf_tables_api.c index 53fa1bae84..e972af05d6 100644 --- a/net/netfilter/nf_tables_api.c +++ b/net/netfilter/nf_tables_api.c @@ -28,6 +28,9 @@ #define NFT_MODULE_AUTOLOAD_LIMIT (MODULE_NAME_LEN - sizeof("nft-expr-255-")) +unsigned int nf_tables_net_id __read_mostly; +EXPORT_SYMBOL_GPL(nf_tables_net_id); + static LIST_HEAD(nf_tables_expressions); static LIST_HEAD(nf_tables_objects); static LIST_HEAD(nf_tables_flowtables); @@ -3593,8 +3596,8 @@ static struct nft_set *nft_set_lookup_byid(const struct net *net, const struct nft_table *table, const struct nlattr *nla, u8 genmask) { - struct nft_trans *trans; u32 id = ntohl(nla_get_be32(nla)); + struct nft_trans *trans; list_for_each_entry(trans, &net->nft.commit_list, list) { if (trans->msg_type == NFT_MSG_NEWSET) { @@ -3837,8 +3840,8 @@ static void nf_tables_set_notify(const struct nft_ctx *ctx, const struct nft_set *set, int event, gfp_t gfp_flags) { - u32 portid = ctx->portid; struct sk_buff *skb; + u32 portid = ctx->portid; u16 flags = 0; int err; @@ -4786,7 +4789,7 @@ static int nf_tables_dump_set(struct sk_buff *skb, struct netlink_callback *cb) rcu_read_lock(); cb->seq = READ_ONCE(net->nft.base_seq); - list_for_each_entry_rcu(table, &net->nft.tables, list) { + list_for_each_entry_rcu(table, &net->nft.tables, list) { if (dump_ctx->ctx.family != NFPROTO_UNSPEC && dump_ctx->ctx.family != table->family) continue; @@ -7964,6 +7967,7 @@ static void nft_set_commit_update(struct list_head *set_update_list) static int nf_tables_commit(struct net *net, struct sk_buff *skb) { + struct nftables_pernet *nft_net = nft_pernet(net); struct nft_trans *trans, *next; LIST_HEAD(set_update_list); struct nft_trans_elem *te; @@ -8398,7 +8402,7 @@ static void nf_tables_cleanup(struct net *net) static int nf_tables_abort(struct net *net, struct sk_buff *skb, enum nfnl_abort_action action) { - int ret = __nf_tables_abort(net, action); + struct nftables_pernet *nft_net = nft_pernet(net); mutex_unlock(&net->nft_commit_mutex); @@ -8407,6 +8411,7 @@ static int nf_tables_abort(struct net *net, struct sk_buff *skb, static bool nf_tables_valid_genid(struct net *net, u32 genid) { + struct nftables_pernet *nft_net = nft_pernet(net); bool genid_ok; mutex_lock(&net->nft_commit_mutex); @@ -9019,6 +9024,8 @@ static void __nft_release_tables(struct net *net) static int __net_init nf_tables_init_net(struct net *net) { + struct nftables_pernet *nft_net = nft_pernet(net); + INIT_LIST_HEAD(&net->nft.tables); INIT_LIST_HEAD(&net->nft.commit_list); INIT_LIST_HEAD(&net->nft_module_list); @@ -9039,11 +9046,16 @@ static void __net_exit nf_tables_pre_exit_net(struct net *net) static void __net_exit nf_tables_exit_net(struct net *net) { + struct nftables_pernet *nft_net = nft_pernet(net); + mutex_lock(&net->nft_commit_mutex); + if (!list_empty(&net->nft.commit_list) || !list_empty(&net->nft_module_list)) __nf_tables_abort(net, NFNL_ABORT_NONE); + __nft_release_tables(net); + mutex_unlock(&net->nft_commit_mutex); WARN_ON_ONCE(!list_empty(&net->nft.tables)); WARN_ON_ONCE(!list_empty(&net->nft_module_list)); From b6ecc9d01a2748ed6337a6ffde5f69c25183b2c7 Mon Sep 17 00:00:00 2001 From: Greg Rose Date: Tue, 22 Oct 2024 16:42:27 -0700 Subject: [PATCH 16/49] netfilter: nf_tables: GC transaction API to avoid race with control plane jira VULN-429 cve CVE-2023-4244 commit-author Pablo Neira Ayuso ' commit 5f68718b34a531a556f2f50300ead2862278da26 upstream-diff - Upstream fuzz and conflicts. Resolved by pointing to 4.18.0-534 as the source of truth. The set types rhashtable and rbtree use a GC worker to reclaim memory. From system work queue, in periodic intervals, a scan of the table is done. The major caveat here is that the nft transaction mutex is not held. This causes a race between control plane and GC when they attempt to delete the same element. We cannot grab the netlink mutex from the work queue, because the control plane has to wait for the GC work queue in case the set is to be removed, so we get following deadlock: cpu 1 cpu2 GC work transaction comes in , lock nft mutex `acquire nft mutex // BLOCKS transaction asks to remove the set set destruction calls cancel_work_sync() cancel_work_sync will now block forever, because it is waiting for the mutex the caller already owns. This patch adds a new API that deals with garbage collection in two steps: 1) Lockless GC of expired elements sets on the NFT_SET_ELEM_DEAD_BIT so they are not visible via lookup. Annotate current GC sequence in the GC transaction. Enqueue GC transaction work as soon as it is full. If ruleset is updated, then GC transaction is aborted and retried later. 2) GC work grabs the mutex. If GC sequence has changed then this GC transaction lost race with control plane, abort it as it contains stale references to objects and let GC try again later. If the ruleset is intact, then this GC transaction deactivates and removes the elements and it uses call_rcu() to destroy elements. Note that no elements are removed from GC lockless path, the _DEAD bit is set and pointers are collected. GC catchall does not remove the elements anymore too. There is a new set->dead flag that is set on to abort the GC transaction to deal with set->ops->destroy() path which removes the remaining elements in the set from commit_release, where no mutex is held. To deal with GC when mutex is held, which allows safe deactivate and removal, add sync GC API which releases the set element object via call_rcu(). This is used by rbtree and pipapo backends which also perform garbage collection from control plane path. Since element removal from sets can happen from control plane and element garbage collection/timeout, it is necessary to keep the set structure alive until all elements have been deactivated and destroyed. We cannot do a cancel_work_sync or flush_work in nft_set_destroy because its called with the transaction mutex held, but the aforementioned async work queue might be blocked on the very mutex that nft_set_destroy() callchain is sitting on. This gives us the choice of ABBA deadlock or UaF. To avoid both, add set->refs refcount_t member. The GC API can then increment the set refcount and release it once the elements have been free'd. Set backends are adapted to use the GC transaction API in a follow up patch entitled: ("netfilter: nf_tables: use gc transaction API in set backends") This is joint work with Florian Westphal. Fixes: cfed7e1b1f8e ("netfilter: nf_tables: add set garbage collection helpers") Signed-off-by: Pablo Neira Ayuso Signed-off-by: Greg Rose --- include/net/netfilter/nf_tables.h | 62 ++++++++- net/netfilter/nf_tables_api.c | 216 ++++++++++++++++++++++++++++-- 2 files changed, 268 insertions(+), 10 deletions(-) diff --git a/include/net/netfilter/nf_tables.h b/include/net/netfilter/nf_tables.h index 98f9f77697..480c4f8e5e 100644 --- a/include/net/netfilter/nf_tables.h +++ b/include/net/netfilter/nf_tables.h @@ -445,6 +445,7 @@ struct nft_set_elem_expr { * * @list: table set list node * @bindings: list of set bindings + * @refs: internal refcounting for async set destruction * @table: table this set belongs to * @net: netnamespace this set belongs to * @name: name of the set @@ -474,6 +475,7 @@ struct nft_set_elem_expr { struct nft_set { struct list_head list; struct list_head bindings; + refcount_t refs; struct nft_table *table; possible_net_t net; char *name; @@ -495,7 +497,8 @@ struct nft_set { struct list_head pending_update; /* runtime data below here */ const struct nft_set_ops *ops ____cacheline_aligned; - u16 flags:14, + u16 flags:13, + dead:1, genmask:2; u8 klen; u8 dlen; @@ -1444,6 +1447,32 @@ static inline void nft_set_elem_clear_busy(struct nft_set_ext *ext) clear_bit(NFT_SET_ELEM_BUSY_BIT, word); } +#define NFT_SET_ELEM_DEAD_MASK (1 << 3) + +#if defined(__LITTLE_ENDIAN_BITFIELD) +#define NFT_SET_ELEM_DEAD_BIT 3 +#elif defined(__BIG_ENDIAN_BITFIELD) +#define NFT_SET_ELEM_DEAD_BIT (BITS_PER_LONG - BITS_PER_BYTE + 3) +#else +#error +#endif + +static inline void nft_set_elem_dead(struct nft_set_ext *ext) +{ + unsigned long *word = (unsigned long *)ext; + + BUILD_BUG_ON(offsetof(struct nft_set_ext, genmask) != 0); + set_bit(NFT_SET_ELEM_DEAD_BIT, word); +} + +static inline int nft_set_elem_is_dead(const struct nft_set_ext *ext) +{ + unsigned long *word = (unsigned long *)ext; + + BUILD_BUG_ON(offsetof(struct nft_set_ext, genmask) != 0); + return test_bit(NFT_SET_ELEM_DEAD_BIT, word); +} + /** * struct nft_trans - nf_tables object update in transaction * @@ -1546,6 +1575,35 @@ struct nft_trans_flowtable { #define nft_trans_flowtable(trans) \ (((struct nft_trans_flowtable *)trans->data)->flowtable) +#define NFT_TRANS_GC_BATCHCOUNT 256 + +struct nft_trans_gc { + struct list_head list; + struct net *net; + struct nft_set *set; + u32 seq; + u8 count; + void *priv[NFT_TRANS_GC_BATCHCOUNT]; + struct rcu_head rcu; +}; + +struct nft_trans_gc *nft_trans_gc_alloc(struct nft_set *set, + unsigned int gc_seq, gfp_t gfp); +void nft_trans_gc_destroy(struct nft_trans_gc *trans); + +struct nft_trans_gc *nft_trans_gc_queue_async(struct nft_trans_gc *gc, + unsigned int gc_seq, gfp_t gfp); +void nft_trans_gc_queue_async_done(struct nft_trans_gc *gc); + +struct nft_trans_gc *nft_trans_gc_queue_sync(struct nft_trans_gc *gc, gfp_t gfp); +void nft_trans_gc_queue_sync_done(struct nft_trans_gc *trans); + +void nft_trans_gc_elem_add(struct nft_trans_gc *gc, void *priv); + +void nft_setelem_data_deactivate(const struct net *net, + const struct nft_set *set, + struct nft_set_elem *elem); + int __init nft_chain_filter_init(void); void nft_chain_filter_fini(void); @@ -1564,7 +1622,7 @@ static inline int nft_request_module(struct net *net, const char *fmt, ...) { re #endif struct nftables_pernet { - unsigned int gc_seq; + unsigned int gc_seq; }; extern unsigned int nf_tables_net_id; diff --git a/net/netfilter/nf_tables_api.c b/net/netfilter/nf_tables_api.c index e972af05d6..6e4faf8ea2 100644 --- a/net/netfilter/nf_tables_api.c +++ b/net/netfilter/nf_tables_api.c @@ -35,7 +35,9 @@ static LIST_HEAD(nf_tables_expressions); static LIST_HEAD(nf_tables_objects); static LIST_HEAD(nf_tables_flowtables); static LIST_HEAD(nf_tables_destroy_list); +static LIST_HEAD(nf_tables_gc_list); static DEFINE_SPINLOCK(nf_tables_destroy_list_lock); +static DEFINE_SPINLOCK(nf_tables_gc_list_lock); enum { NFT_VALIDATE_SKIP = 0, @@ -125,6 +127,9 @@ static void nft_validate_state_update(struct net *net, u8 new_validate_state) static void nf_tables_trans_destroy_work(struct work_struct *w); static DECLARE_WORK(trans_destroy_work, nf_tables_trans_destroy_work); +static void nft_trans_gc_work(struct work_struct *work); +static DECLARE_WORK(trans_gc_work, nft_trans_gc_work); + static void nft_ctx_init(struct nft_ctx *ctx, struct net *net, const struct sk_buff *skb, @@ -4280,6 +4285,7 @@ static int nf_tables_newset(struct net *net, struct sock *nlsk, } INIT_LIST_HEAD(&set->bindings); + refcount_set(&set->refs, 1); set->table = table; write_pnet(&set->net, net); set->ops = ops; @@ -4368,6 +4374,14 @@ static int nf_tables_newset(struct net *net, struct sock *nlsk, return err; } +static void nft_set_put(struct nft_set *set) +{ + if (refcount_dec_and_test(&set->refs)) { + kfree(set->name); + kvfree(set); + } +} + static void nft_set_destroy(const struct nft_ctx *ctx, struct nft_set *set) { int i; @@ -4380,8 +4394,7 @@ static void nft_set_destroy(const struct nft_ctx *ctx, struct nft_set *set) set->ops->destroy(set); module_put(to_set_type(set->ops)->owner); - kfree(set->name); - kvfree(set); + nft_set_put(set); } static int nf_tables_delset(struct net *net, struct sock *nlsk, @@ -5753,9 +5766,9 @@ static void nft_setelem_data_activate(const struct net *net, nft_use_inc_restore(&(*nft_set_ext_obj(ext))->use); } -static void nft_setelem_data_deactivate(const struct net *net, - const struct nft_set *set, - struct nft_set_elem *elem) +void nft_setelem_data_deactivate(const struct net *net, + const struct nft_set *set, + struct nft_set_elem *elem) { const struct nft_set_ext *ext = nft_set_elem_ext(set, elem->priv); @@ -7810,6 +7823,181 @@ static void nft_chain_del(struct nft_chain *chain) list_del_rcu(&chain->list); } +static void nft_trans_gc_setelem_remove(struct nft_ctx *ctx, + struct nft_trans_gc *trans) +{ + void **priv = trans->priv; + unsigned int i; + + for (i = 0; i < trans->count; i++) { + struct nft_set_elem elem = { + .priv = priv[i], + }; + + nft_setelem_data_deactivate(ctx->net, trans->set, &elem); + nft_setelem_remove(ctx->net, trans->set, &elem); + } +} + +void nft_trans_gc_destroy(struct nft_trans_gc *trans) +{ + nft_set_put(trans->set); + put_net(trans->net); + kfree(trans); +} + +static void nft_trans_gc_trans_free(struct rcu_head *rcu) +{ + struct nft_set_elem elem = {}; + struct nft_trans_gc *trans; + struct nft_ctx ctx = {}; + unsigned int i; + + trans = container_of(rcu, struct nft_trans_gc, rcu); + ctx.net = read_pnet(&trans->set->net); + + for (i = 0; i < trans->count; i++) { + elem.priv = trans->priv[i]; + atomic_dec(&trans->set->nelems); + + nf_tables_set_elem_destroy(&ctx, trans->set, elem.priv); + } + + nft_trans_gc_destroy(trans); +} + +static bool nft_trans_gc_work_done(struct nft_trans_gc *trans) +{ + struct nftables_pernet *nft_net; + struct nft_ctx ctx = {}; + struct net *net; + + net = trans->net; + nft_net = nft_pernet(net); + + mutex_lock(&net->nft_commit_mutex); + + /* Check for race with transaction, otherwise this batch refers to + * stale objects that might not be there anymore. Skip transaction if + * set has been destroyed from control plane transaction in case gc + * worker loses race. + */ + if (READ_ONCE(nft_net->gc_seq) != trans->seq || trans->set->dead) { + mutex_unlock(&net->nft_commit_mutex); + return false; + } + + ctx.net = trans->net; + ctx.table = trans->set->table; + + nft_trans_gc_setelem_remove(&ctx, trans); + mutex_unlock(&net->nft_commit_mutex); + + return true; +} + +static void nft_trans_gc_work(struct work_struct *work) +{ + struct nft_trans_gc *trans, *next; + LIST_HEAD(trans_gc_list); + + spin_lock(&nf_tables_destroy_list_lock); + list_splice_init(&nf_tables_gc_list, &trans_gc_list); + spin_unlock(&nf_tables_destroy_list_lock); + + list_for_each_entry_safe(trans, next, &trans_gc_list, list) { + list_del(&trans->list); + if (!nft_trans_gc_work_done(trans)) { + nft_trans_gc_destroy(trans); + continue; + } + call_rcu(&trans->rcu, nft_trans_gc_trans_free); + } +} + +struct nft_trans_gc *nft_trans_gc_alloc(struct nft_set *set, + unsigned int gc_seq, gfp_t gfp) +{ + struct net *net = read_pnet(&set->net); + struct nft_trans_gc *trans; + + trans = kzalloc(sizeof(*trans), gfp); + if (!trans) + return NULL; + + refcount_inc(&set->refs); + trans->set = set; + trans->net = get_net(net); + trans->seq = gc_seq; + + return trans; +} + +void nft_trans_gc_elem_add(struct nft_trans_gc *trans, void *priv) +{ + trans->priv[trans->count++] = priv; +} + +static void nft_trans_gc_queue_work(struct nft_trans_gc *trans) +{ + spin_lock(&nf_tables_gc_list_lock); + list_add_tail(&trans->list, &nf_tables_gc_list); + spin_unlock(&nf_tables_gc_list_lock); + + schedule_work(&trans_gc_work); +} + +static int nft_trans_gc_space(struct nft_trans_gc *trans) +{ + return NFT_TRANS_GC_BATCHCOUNT - trans->count; +} + +struct nft_trans_gc *nft_trans_gc_queue_async(struct nft_trans_gc *gc, + unsigned int gc_seq, gfp_t gfp) +{ + if (nft_trans_gc_space(gc)) + return gc; + + nft_trans_gc_queue_work(gc); + + return nft_trans_gc_alloc(gc->set, gc_seq, gfp); +} + +void nft_trans_gc_queue_async_done(struct nft_trans_gc *trans) +{ + if (trans->count == 0) { + nft_trans_gc_destroy(trans); + return; + } + + nft_trans_gc_queue_work(trans); +} + +struct nft_trans_gc *nft_trans_gc_queue_sync(struct nft_trans_gc *gc, gfp_t gfp) +{ + if (WARN_ON_ONCE(!lockdep_commit_lock_is_held(gc->net))) + return NULL; + + if (nft_trans_gc_space(gc)) + return gc; + + call_rcu(&gc->rcu, nft_trans_gc_trans_free); + + return nft_trans_gc_alloc(gc->set, 0, gfp); +} + +void nft_trans_gc_queue_sync_done(struct nft_trans_gc *trans) +{ + WARN_ON_ONCE(!lockdep_commit_lock_is_held(trans->net)); + + if (trans->count == 0) { + nft_trans_gc_destroy(trans); + return; + } + + call_rcu(&trans->rcu, nft_trans_gc_trans_free); +} + static void nf_tables_module_autoload_cleanup(struct net *net) { struct nft_module_request *req, *next; @@ -8103,6 +8291,7 @@ static int nf_tables_commit(struct net *net, struct sk_buff *skb) nft_trans_destroy(trans); break; case NFT_MSG_DELSET: + nft_trans_set(trans)->dead = 1; list_del_rcu(&nft_trans_set(trans)->list); nf_tables_set_notify(&trans->ctx, nft_trans_set(trans), NFT_MSG_DELSET, GFP_KERNEL); @@ -8127,7 +8316,7 @@ static int nf_tables_commit(struct net *net, struct sk_buff *skb) nf_tables_setelem_notify(&trans->ctx, te->set, &te->elem, NFT_MSG_DELSETELEM); - te->set->ops->remove(net, te->set, &te->elem); + nft_setelem_remove(net, te->set, &te->elem); atomic_dec(&te->set->nelems); te->set->ndeact--; if (te->set->ops->commit && @@ -8178,6 +8367,7 @@ static int nf_tables_commit(struct net *net, struct sk_buff *skb) nft_commit_notify(net, NETLINK_CB(skb).portid); nf_tables_gen_notify(net, skb, NFT_MSG_NEWGEN); nf_tables_commit_audit_log(&adl, net->nft.base_seq); + nf_tables_commit_release(net); return 0; @@ -8284,7 +8474,7 @@ static int __nf_tables_abort(struct net *net, enum nfnl_abort_action action) } break; case NFT_MSG_DELCHAIN: - nft_use_inc_restore(&trans->ctx.chain->use); + nft_use_inc_restore(&trans->ctx.table->use); nft_clear(trans->ctx.net, trans->ctx.chain); nft_trans_destroy(trans); break; @@ -8367,6 +8557,7 @@ static int __nf_tables_abort(struct net *net, enum nfnl_abort_action action) list_del_rcu(&nft_trans_flowtable(trans)->list); nft_unregister_flowtable_net_hooks(net, nft_trans_flowtable(trans)); + break; case NFT_MSG_DELFLOWTABLE: nft_use_inc_restore(&trans->ctx.table->use); list_del_rcu(&nft_trans_flowtable(trans)->list); @@ -8411,7 +8602,6 @@ static int nf_tables_abort(struct net *net, struct sk_buff *skb, static bool nf_tables_valid_genid(struct net *net, u32 genid) { - struct nftables_pernet *nft_net = nft_pernet(net); bool genid_ok; mutex_lock(&net->nft_commit_mutex); @@ -9033,6 +9223,7 @@ static int __net_init nf_tables_init_net(struct net *net) mutex_init(&net->nft_commit_mutex); net->nft.base_seq = 1; net->nft.validate_state = NFT_VALIDATE_SKIP; + nft_net->gc_seq = 0; return 0; } @@ -9062,10 +9253,18 @@ static void __net_exit nf_tables_exit_net(struct net *net) WARN_ON_ONCE(!list_empty(&net->nft_notify_list)); } +static void nf_tables_exit_batch(struct list_head *net_exit_list) +{ + flush_work(&trans_gc_work); +} + static struct pernet_operations nf_tables_net_ops = { .init = nf_tables_init_net, .pre_exit = nf_tables_pre_exit_net, .exit = nf_tables_exit_net, + .exit_batch = nf_tables_exit_batch, + .id = &nf_tables_net_id, + .size = sizeof(struct nftables_pernet), }; static int __init nf_tables_module_init(void) @@ -9128,6 +9327,7 @@ static void __exit nf_tables_module_exit(void) nft_chain_filter_fini(); nft_chain_route_fini(); unregister_pernet_subsys(&nf_tables_net_ops); + cancel_work_sync(&trans_gc_work); cancel_work_sync(&trans_destroy_work); rcu_barrier(); rhltable_destroy(&nft_objname_ht); From a3c94410bd3ec2e960ccf5f897ce9f1c977f7671 Mon Sep 17 00:00:00 2001 From: Greg Rose Date: Wed, 23 Oct 2024 11:11:26 -0700 Subject: [PATCH 17/49] netfilter: nf_tables: adapt set backend to use GC transaction API jira VULN-429 cve CVE-2023-4244 commit-author Pablo Neira Ayuso ' commit f6c383b8c31a93752a52697f8430a71dcbc46adf upstream-diff - Upstream fuzz and conflicts. Resolved by pointing to 4.18.0-534 as the source of truth. Previous commiit 5d235d6ce75c is completely overwritten by this commit so we're not backporting it. Use the GC transaction API to replace the old and buggy gc API and the busy mark approach. No set elements are removed from async garbage collection anymore, instead the _DEAD bit is set on so the set element is not visible from lookup path anymore. Async GC enqueues transaction work that might be aborted and retried later. rbtree and pipapo set backends does not set on the _DEAD bit from the sync GC path since this runs in control plane path where mutex is held. In this case, set elements are deactivated, removed and then released via RCU callback, sync GC never fails. Fixes: 3c4287f62044 ("nf_tables: Add set type for arbitrary concatenation of ranges") Fixes: 8d8540c4f5e0 ("netfilter: nft_set_rbtree: add timeout support") Fixes: 9d0982927e79 ("netfilter: nft_hash: add support for timeouts") Signed-off-by: Pablo Neira Ayuso (cherry picked from commit f6c383b8c31a93752a52697f8430a71dcbc46adf) Signed-off-by: Greg Rose --- net/netfilter/nf_tables_api.c | 8 +++ net/netfilter/nft_set_hash.c | 68 +++++++++++++++-------- net/netfilter/nft_set_pipapo.c | 45 ++++++++++++---- net/netfilter/nft_set_rbtree.c | 98 +++++++++++++++++++++------------- 4 files changed, 150 insertions(+), 69 deletions(-) diff --git a/net/netfilter/nf_tables_api.c b/net/netfilter/nf_tables_api.c index 6e4faf8ea2..8ecb5b9bfc 100644 --- a/net/netfilter/nf_tables_api.c +++ b/net/netfilter/nf_tables_api.c @@ -5777,6 +5777,7 @@ void nft_setelem_data_deactivate(const struct net *net, if (nft_set_ext_exists(ext, NFT_SET_EXT_OBJREF)) nft_use_dec(&(*nft_set_ext_obj(ext))->use); } +EXPORT_SYMBOL_GPL(nft_setelem_data_deactivate); static int nft_del_setelem(struct nft_ctx *ctx, struct nft_set *set, const struct nlattr *attr) @@ -7845,6 +7846,7 @@ void nft_trans_gc_destroy(struct nft_trans_gc *trans) put_net(trans->net); kfree(trans); } +EXPORT_SYMBOL_GPL(nft_trans_gc_destroy); static void nft_trans_gc_trans_free(struct rcu_head *rcu) { @@ -7932,11 +7934,13 @@ struct nft_trans_gc *nft_trans_gc_alloc(struct nft_set *set, return trans; } +EXPORT_SYMBOL_GPL(nft_trans_gc_alloc); void nft_trans_gc_elem_add(struct nft_trans_gc *trans, void *priv) { trans->priv[trans->count++] = priv; } +EXPORT_SYMBOL_GPL(nft_trans_gc_elem_add); static void nft_trans_gc_queue_work(struct nft_trans_gc *trans) { @@ -7962,6 +7966,7 @@ struct nft_trans_gc *nft_trans_gc_queue_async(struct nft_trans_gc *gc, return nft_trans_gc_alloc(gc->set, gc_seq, gfp); } +EXPORT_SYMBOL_GPL(nft_trans_gc_queue_async); void nft_trans_gc_queue_async_done(struct nft_trans_gc *trans) { @@ -7972,6 +7977,7 @@ void nft_trans_gc_queue_async_done(struct nft_trans_gc *trans) nft_trans_gc_queue_work(trans); } +EXPORT_SYMBOL_GPL(nft_trans_gc_queue_async_done); struct nft_trans_gc *nft_trans_gc_queue_sync(struct nft_trans_gc *gc, gfp_t gfp) { @@ -7985,6 +7991,7 @@ struct nft_trans_gc *nft_trans_gc_queue_sync(struct nft_trans_gc *gc, gfp_t gfp) return nft_trans_gc_alloc(gc->set, 0, gfp); } +EXPORT_SYMBOL_GPL(nft_trans_gc_queue_sync); void nft_trans_gc_queue_sync_done(struct nft_trans_gc *trans) { @@ -7997,6 +8004,7 @@ void nft_trans_gc_queue_sync_done(struct nft_trans_gc *trans) call_rcu(&trans->rcu, nft_trans_gc_trans_free); } +EXPORT_SYMBOL_GPL(nft_trans_gc_queue_sync_done); static void nf_tables_module_autoload_cleanup(struct net *net) { diff --git a/net/netfilter/nft_set_hash.c b/net/netfilter/nft_set_hash.c index 037a2a90e4..e8d10bba30 100644 --- a/net/netfilter/nft_set_hash.c +++ b/net/netfilter/nft_set_hash.c @@ -62,6 +62,8 @@ static inline int nft_rhash_cmp(struct rhashtable_compare_arg *arg, if (memcmp(nft_set_ext_key(&he->ext), x->key, x->set->klen)) return 1; + if (nft_set_elem_is_dead(&he->ext)) + return 1; if (nft_set_elem_expired(&he->ext)) return 1; if (!nft_set_elem_active(&he->ext, x->genmask)) @@ -190,7 +192,6 @@ static void nft_rhash_activate(const struct net *net, const struct nft_set *set, struct nft_rhash_elem *he = elem->priv; nft_set_elem_change_active(net, set, &he->ext); - nft_set_elem_clear_busy(&he->ext); } static bool nft_rhash_flush(const struct net *net, @@ -198,12 +199,9 @@ static bool nft_rhash_flush(const struct net *net, { struct nft_rhash_elem *he = priv; - if (!nft_set_elem_mark_busy(&he->ext) || - !nft_is_active(net, &he->ext)) { - nft_set_elem_change_active(net, set, &he->ext); - return true; - } - return false; + nft_set_elem_change_active(net, set, &he->ext); + + return true; } static void *nft_rhash_deactivate(const struct net *net, @@ -220,9 +218,8 @@ static void *nft_rhash_deactivate(const struct net *net, rcu_read_lock(); he = rhashtable_lookup(&priv->ht, &arg, nft_rhash_params); - if (he != NULL && - !nft_rhash_flush(net, set, he)) - he = NULL; + if (he) + nft_set_elem_change_active(net, set, &he->ext); rcu_read_unlock(); @@ -296,25 +293,48 @@ static bool nft_rhash_expr_needs_gc_run(const struct nft_set *set, static void nft_rhash_gc(struct work_struct *work) { + struct nftables_pernet *nft_net; struct nft_set *set; struct nft_rhash_elem *he; struct nft_rhash *priv; - struct nft_set_gc_batch *gcb = NULL; struct rhashtable_iter hti; + struct nft_trans_gc *gc; + struct net *net; + u32 gc_seq; priv = container_of(work, struct nft_rhash, gc_work.work); set = nft_set_container_of(priv); + net = read_pnet(&set->net); + nft_net = nft_pernet(net); + gc_seq = READ_ONCE(nft_net->gc_seq); + + gc = nft_trans_gc_alloc(set, gc_seq, GFP_KERNEL); + if (!gc) + goto done; rhashtable_walk_enter(&priv->ht, &hti); rhashtable_walk_start(&hti); while ((he = rhashtable_walk_next(&hti))) { if (IS_ERR(he)) { - if (PTR_ERR(he) != -EAGAIN) - break; + if (PTR_ERR(he) != -EAGAIN) { + nft_trans_gc_destroy(gc); + gc = NULL; + goto try_later; + } continue; } + /* Ruleset has been updated, try later. */ + if (READ_ONCE(nft_net->gc_seq) != gc_seq) { + nft_trans_gc_destroy(gc); + gc = NULL; + goto try_later; + } + + if (nft_set_elem_is_dead(&he->ext)) + goto dead_elem; + if (nft_set_ext_exists(&he->ext, NFT_SET_EXT_EXPRESSIONS) && nft_rhash_expr_needs_gc_run(set, &he->ext)) goto needs_gc_run; @@ -322,20 +342,23 @@ static void nft_rhash_gc(struct work_struct *work) if (!nft_set_elem_expired(&he->ext)) continue; needs_gc_run: - if (nft_set_elem_mark_busy(&he->ext)) - continue; + nft_set_elem_dead(&he->ext); +dead_elem: + gc = nft_trans_gc_queue_async(gc, gc_seq, GFP_ATOMIC); + if (!gc) + goto try_later; - gcb = nft_set_gc_batch_check(set, gcb, GFP_ATOMIC); - if (gcb == NULL) - break; - rhashtable_remove_fast(&priv->ht, &he->node, nft_rhash_params); - atomic_dec(&set->nelems); - nft_set_gc_batch_add(gcb, he); + nft_trans_gc_elem_add(gc, he); } + +try_later: rhashtable_walk_stop(&hti); rhashtable_walk_exit(&hti); - nft_set_gc_batch_complete(gcb); + if (gc) + nft_trans_gc_queue_async_done(gc); + +done: queue_delayed_work(system_power_efficient_wq, &priv->gc_work, nft_set_gc_interval(set)); } @@ -386,7 +409,6 @@ static void nft_rhash_destroy(const struct nft_set *set) struct nft_rhash *priv = nft_set_priv(set); cancel_delayed_work_sync(&priv->gc_work); - rcu_barrier(); rhashtable_free_and_destroy(&priv->ht, nft_rhash_elem_destroy, (void *)set); } diff --git a/net/netfilter/nft_set_pipapo.c b/net/netfilter/nft_set_pipapo.c index 90562786e3..b713811282 100644 --- a/net/netfilter/nft_set_pipapo.c +++ b/net/netfilter/nft_set_pipapo.c @@ -1436,20 +1436,38 @@ static void pipapo_drop(struct nft_pipapo_match *m, } } +static void nft_pipapo_gc_deactivate(struct net *net, struct nft_set *set, + struct nft_pipapo_elem *e) + +{ + struct nft_set_elem elem = { + .priv = e, + }; + + nft_setelem_data_deactivate(net, set, &elem); +} + /** * pipapo_gc() - Drop expired entries from set, destroy start and end elements * @set: nftables API set representation * @m: Matching data */ -static void pipapo_gc(const struct nft_set *set, struct nft_pipapo_match *m) +static void pipapo_gc(const struct nft_set *_set, struct nft_pipapo_match *m) { + struct nft_set *set = (struct nft_set *) _set; struct nft_pipapo *priv = nft_set_priv(set); + struct net *net = read_pnet(&set->net); int rules_f0, first_rule = 0; + struct nft_pipapo_elem *e; + struct nft_trans_gc *gc; + + gc = nft_trans_gc_alloc(set, 0, GFP_KERNEL); + if (!gc) + return; while ((rules_f0 = pipapo_rules_same_key(m->f, first_rule))) { union nft_pipapo_map_bucket rulemap[NFT_PIPAPO_MAX_FIELDS]; struct nft_pipapo_field *f; - struct nft_pipapo_elem *e; int i, start, rules_fx; start = first_rule; @@ -1469,13 +1487,20 @@ static void pipapo_gc(const struct nft_set *set, struct nft_pipapo_match *m) f--; i--; e = f->mt[rulemap[i].to].e; - if (nft_set_elem_expired(&e->ext) && - !nft_set_elem_mark_busy(&e->ext)) { + + /* synchronous gc never fails, there is no need to set on + * NFT_SET_ELEM_DEAD_BIT. + */ + if (nft_set_elem_expired(&e->ext)) { priv->dirty = true; - pipapo_drop(m, rulemap); - rcu_barrier(); - nft_set_elem_destroy(set, e, true); + gc = nft_trans_gc_queue_sync(gc, GFP_ATOMIC); + if (!gc) + break; + + nft_pipapo_gc_deactivate(net, set, e); + pipapo_drop(m, rulemap); + nft_trans_gc_elem_add(gc, e); /* And check again current first rule, which is now the * first we haven't checked. @@ -1485,7 +1510,10 @@ static void pipapo_gc(const struct nft_set *set, struct nft_pipapo_match *m) } } - priv->last_gc = jiffies; + if (gc) { + nft_trans_gc_queue_sync_done(gc); + priv->last_gc = jiffies; + } } /** @@ -1607,7 +1635,6 @@ static void nft_pipapo_activate(const struct net *net, return; nft_set_elem_change_active(net, set, &e->ext); - nft_set_elem_clear_busy(&e->ext); } /** diff --git a/net/netfilter/nft_set_rbtree.c b/net/netfilter/nft_set_rbtree.c index 10883b3376..59c3bcc6fc 100644 --- a/net/netfilter/nft_set_rbtree.c +++ b/net/netfilter/nft_set_rbtree.c @@ -47,6 +47,12 @@ static bool nft_rbtree_equal(const struct nft_set *set, const void *this, return memcmp(this, nft_set_ext_key(&interval->ext), set->klen) == 0; } +static bool nft_rbtree_elem_expired(const struct nft_rbtree_elem *rbe) +{ + return nft_set_elem_expired(&rbe->ext) || + nft_set_elem_is_dead(&rbe->ext); +} + static bool __nft_rbtree_lookup(const struct net *net, const struct nft_set *set, const u32 *key, const struct nft_set_ext **ext, unsigned int seq) @@ -83,7 +89,7 @@ static bool __nft_rbtree_lookup(const struct net *net, const struct nft_set *set continue; } - if (nft_set_elem_expired(&rbe->ext)) + if (nft_rbtree_elem_expired(rbe)) return false; if (nft_rbtree_interval_end(rbe)) { @@ -101,7 +107,7 @@ static bool __nft_rbtree_lookup(const struct net *net, const struct nft_set *set if (set->flags & NFT_SET_INTERVAL && interval != NULL && nft_set_elem_active(&interval->ext, genmask) && - !nft_set_elem_expired(&interval->ext) && + !nft_rbtree_elem_expired(interval) && nft_rbtree_interval_start(interval)) { *ext = &interval->ext; return true; @@ -376,7 +382,6 @@ static void nft_rbtree_activate(const struct net *net, struct nft_rbtree_elem *rbe = elem->priv; nft_set_elem_change_active(net, set, &rbe->ext); - nft_set_elem_clear_busy(&rbe->ext); } static bool nft_rbtree_flush(const struct net *net, @@ -384,12 +389,9 @@ static bool nft_rbtree_flush(const struct net *net, { struct nft_rbtree_elem *rbe = priv; - if (!nft_set_elem_mark_busy(&rbe->ext) || - !nft_is_active(net, &rbe->ext)) { - nft_set_elem_change_active(net, set, &rbe->ext); - return true; - } - return false; + nft_set_elem_change_active(net, set, &rbe->ext); + + return true; } static void *nft_rbtree_deactivate(const struct net *net, @@ -464,58 +466,80 @@ static void nft_rbtree_walk(const struct nft_ctx *ctx, static void nft_rbtree_gc(struct work_struct *work) { - struct nft_rbtree_elem *rbe, *rbe_end = NULL, *rbe_prev = NULL; - struct nft_set_gc_batch *gcb = NULL; + struct nft_rbtree_elem *rbe, *rbe_end = NULL; + struct nftables_pernet *nft_net; struct nft_rbtree *priv; + struct nft_trans_gc *gc; struct rb_node *node; struct nft_set *set; + unsigned int gc_seq; + struct net *net; priv = container_of(work, struct nft_rbtree, gc_work.work); set = nft_set_container_of(priv); + net = read_pnet(&set->net); + nft_net = nft_pernet(net); + gc_seq = READ_ONCE(nft_net->gc_seq); + + gc = nft_trans_gc_alloc(set, gc_seq, GFP_KERNEL); + if (!gc) + goto done; write_lock_bh(&priv->lock); write_seqcount_begin(&priv->count); for (node = rb_first(&priv->root); node != NULL; node = rb_next(node)) { + + /* Ruleset has been updated, try later. */ + if (READ_ONCE(nft_net->gc_seq) != gc_seq) { + nft_trans_gc_destroy(gc); + gc = NULL; + goto try_later; + } + rbe = rb_entry(node, struct nft_rbtree_elem, node); + if (nft_set_elem_is_dead(&rbe->ext)) + goto dead_elem; + + /* elements are reversed in the rbtree for historical reasons, + * from highest to lowest value, that is why end element is + * always visited before the start element. + */ if (nft_rbtree_interval_end(rbe)) { rbe_end = rbe; continue; } if (!nft_set_elem_expired(&rbe->ext)) continue; - if (nft_set_elem_mark_busy(&rbe->ext)) + + nft_set_elem_dead(&rbe->ext); + + if (!rbe_end) continue; - if (rbe_prev) { - rb_erase(&rbe_prev->node, &priv->root); - rbe_prev = NULL; - } - gcb = nft_set_gc_batch_check(set, gcb, GFP_ATOMIC); - if (!gcb) - break; - - atomic_dec(&set->nelems); - nft_set_gc_batch_add(gcb, rbe); - rbe_prev = rbe; - - if (rbe_end) { - atomic_dec(&set->nelems); - nft_set_gc_batch_add(gcb, rbe_end); - rb_erase(&rbe_end->node, &priv->root); - rbe_end = NULL; - } - node = rb_next(node); - if (!node) - break; + nft_set_elem_dead(&rbe_end->ext); + + gc = nft_trans_gc_queue_async(gc, gc_seq, GFP_ATOMIC); + if (!gc) + goto try_later; + + nft_trans_gc_elem_add(gc, rbe_end); + rbe_end = NULL; +dead_elem: + gc = nft_trans_gc_queue_async(gc, gc_seq, GFP_ATOMIC); + if (!gc) + goto try_later; + + nft_trans_gc_elem_add(gc, rbe); } - if (rbe_prev) - rb_erase(&rbe_prev->node, &priv->root); + +try_later: write_seqcount_end(&priv->count); write_unlock_bh(&priv->lock); - nft_set_gc_batch_complete(gcb); - + if (gc) + nft_trans_gc_queue_async_done(gc); +done: queue_delayed_work(system_power_efficient_wq, &priv->gc_work, nft_set_gc_interval(set)); } From 3ca5985a6c32711210c688de5e97c79cfdf5248f Mon Sep 17 00:00:00 2001 From: Greg Rose Date: Wed, 23 Oct 2024 13:14:39 -0700 Subject: [PATCH 18/49] netfilter: nf_tables: remove busy mark and gc batch API jira VULN-597 subsystem-sync netfilter:nf_tables 4.18.0-534 commit-author Pablo Neira Ayuso commit a2dd0233cbc4d8a0abb5f64487487ffc9265beb5 upstream-diff cherry-pick occassionally pulls in big blobs of unrelated crap. I had to excise significant portions of code in the process of resolving the conflicts. As per usual in this netfilter series I have relied on 4.18.0-534 code as a source of truth. Ditch it, it has been replace it by the GC transaction API and it has no clients anymore. Signed-off-by: Pablo Neira Ayuso (cherry picked from commit a2dd0233cbc4d8a0abb5f64487487ffc9265beb5) Signed-off-by: Greg Rose --- include/net/netfilter/nf_tables.h | 97 +------------------------------ net/netfilter/nf_tables_api.c | 27 +-------- 2 files changed, 4 insertions(+), 120 deletions(-) diff --git a/include/net/netfilter/nf_tables.h b/include/net/netfilter/nf_tables.h index 480c4f8e5e..e82111eda8 100644 --- a/include/net/netfilter/nf_tables.h +++ b/include/net/netfilter/nf_tables.h @@ -740,62 +740,6 @@ int nft_set_elem_expr_clone(const struct nft_ctx *ctx, struct nft_set *set, void nft_set_elem_destroy(const struct nft_set *set, void *elem, bool destroy_expr); -/** - * struct nft_set_gc_batch_head - nf_tables set garbage collection batch - * - * @rcu: rcu head - * @set: set the elements belong to - * @cnt: count of elements - */ -struct nft_set_gc_batch_head { - struct rcu_head rcu; - const struct nft_set *set; - unsigned int cnt; -}; - -#define NFT_SET_GC_BATCH_SIZE ((PAGE_SIZE - \ - sizeof(struct nft_set_gc_batch_head)) / \ - sizeof(void *)) - -/** - * struct nft_set_gc_batch - nf_tables set garbage collection batch - * - * @head: GC batch head - * @elems: garbage collection elements - */ -struct nft_set_gc_batch { - struct nft_set_gc_batch_head head; - void *elems[NFT_SET_GC_BATCH_SIZE]; -}; - -struct nft_set_gc_batch *nft_set_gc_batch_alloc(const struct nft_set *set, - gfp_t gfp); -void nft_set_gc_batch_release(struct rcu_head *rcu); - -static inline void nft_set_gc_batch_complete(struct nft_set_gc_batch *gcb) -{ - if (gcb != NULL) - call_rcu(&gcb->head.rcu, nft_set_gc_batch_release); -} - -static inline struct nft_set_gc_batch * -nft_set_gc_batch_check(const struct nft_set *set, struct nft_set_gc_batch *gcb, - gfp_t gfp) -{ - if (gcb != NULL) { - if (gcb->head.cnt + 1 < ARRAY_SIZE(gcb->elems)) - return gcb; - nft_set_gc_batch_complete(gcb); - } - return nft_set_gc_batch_alloc(set, gfp); -} - -static inline void nft_set_gc_batch_add(struct nft_set_gc_batch *gcb, - void *elem) -{ - gcb->elems[gcb->head.cnt++] = elem; -} - struct nft_expr_ops; /** * struct nft_expr_type - nf_tables expression type @@ -1412,47 +1356,12 @@ static inline void nft_set_elem_change_active(const struct net *net, ext->genmask ^= nft_genmask_next(net); } -/* - * We use a free bit in the genmask field to indicate the element - * is busy, meaning it is currently being processed either by - * the netlink API or GC. - * - * Even though the genmask is only a single byte wide, this works - * because the extension structure if fully constant once initialized, - * so there are no non-atomic write accesses unless it is already - * marked busy. - */ -#define NFT_SET_ELEM_BUSY_MASK (1 << 2) - -#if defined(__LITTLE_ENDIAN_BITFIELD) -#define NFT_SET_ELEM_BUSY_BIT 2 -#elif defined(__BIG_ENDIAN_BITFIELD) -#define NFT_SET_ELEM_BUSY_BIT (BITS_PER_LONG - BITS_PER_BYTE + 2) -#else -#error -#endif - -static inline int nft_set_elem_mark_busy(struct nft_set_ext *ext) -{ - unsigned long *word = (unsigned long *)ext; - - BUILD_BUG_ON(offsetof(struct nft_set_ext, genmask) != 0); - return test_and_set_bit(NFT_SET_ELEM_BUSY_BIT, word); -} - -static inline void nft_set_elem_clear_busy(struct nft_set_ext *ext) -{ - unsigned long *word = (unsigned long *)ext; - - clear_bit(NFT_SET_ELEM_BUSY_BIT, word); -} - -#define NFT_SET_ELEM_DEAD_MASK (1 << 3) +#define NFT_SET_ELEM_DEAD_MASK (1 << 2) #if defined(__LITTLE_ENDIAN_BITFIELD) -#define NFT_SET_ELEM_DEAD_BIT 3 +#define NFT_SET_ELEM_DEAD_BIT 2 #elif defined(__BIG_ENDIAN_BITFIELD) -#define NFT_SET_ELEM_DEAD_BIT (BITS_PER_LONG - BITS_PER_BYTE + 3) +#define NFT_SET_ELEM_DEAD_BIT (BITS_PER_LONG - BITS_PER_BYTE + 2) #else #error #endif diff --git a/net/netfilter/nf_tables_api.c b/net/netfilter/nf_tables_api.c index 8ecb5b9bfc..dc8ea91abd 100644 --- a/net/netfilter/nf_tables_api.c +++ b/net/netfilter/nf_tables_api.c @@ -5630,7 +5630,7 @@ static int nft_add_set_elem(struct nft_ctx *ctx, struct nft_set *set, goto err_elem_expr; } - ext->genmask = nft_genmask_cur(ctx->net) | NFT_SET_ELEM_BUSY_MASK; + ext->genmask = nft_genmask_cur(ctx->net); err = set->ops->insert(ctx->net, set, &elem, &ext2); if (err) { if (err == -EEXIST) { @@ -5947,31 +5947,6 @@ static int nf_tables_delsetelem(struct net *net, struct sock *nlsk, return err; } -void nft_set_gc_batch_release(struct rcu_head *rcu) -{ - struct nft_set_gc_batch *gcb; - unsigned int i; - - gcb = container_of(rcu, struct nft_set_gc_batch, head.rcu); - for (i = 0; i < gcb->head.cnt; i++) - nft_set_elem_destroy(gcb->head.set, gcb->elems[i], true); - kfree(gcb); -} -EXPORT_SYMBOL_GPL(nft_set_gc_batch_release); - -struct nft_set_gc_batch *nft_set_gc_batch_alloc(const struct nft_set *set, - gfp_t gfp) -{ - struct nft_set_gc_batch *gcb; - - gcb = kzalloc(sizeof(*gcb), gfp); - if (gcb == NULL) - return gcb; - gcb->head.set = set; - return gcb; -} -EXPORT_SYMBOL_GPL(nft_set_gc_batch_alloc); - /* * Stateful objects */ From 2dbe733acd823ff6799ea370b831a9486399efa8 Mon Sep 17 00:00:00 2001 From: Greg Rose Date: Wed, 23 Oct 2024 13:29:08 -0700 Subject: [PATCH 19/49] netfilter: nf_tables: fix false-positive lockdep splat jira VULN-597 subsystem-sync netfilter:nf_tables 4.18.0-534 commit-author Florian Westphal commit b9f052dc68f69dac89fe1e24693354c033daa091 upstream-diff Had to synch to the use of inline from the 4.18.0-534 and also found an upstream diff for lockdep_is_held that I also matched to the 4.18.0-534 kernel code. ->abort invocation may cause splat on debug kernels: WARNING: suspicious RCU usage net/netfilter/nft_set_pipapo.c:1697 suspicious rcu_dereference_check() usage! [..] rcu_scheduler_active = 2, debug_locks = 1 1 lock held by nft/133554: [..] (nft_net->commit_mutex){+.+.}-{3:3}, at: nf_tables_valid_genid [..] lockdep_rcu_suspicious+0x1ad/0x260 nft_pipapo_abort+0x145/0x180 __nf_tables_abort+0x5359/0x63d0 nf_tables_abort+0x24/0x40 nfnetlink_rcv+0x1a0a/0x22c0 netlink_unicast+0x73c/0x900 netlink_sendmsg+0x7f0/0xc20 ____sys_sendmsg+0x48d/0x760 Transaction mutex is held, so parallel updates are not possible. Switch to _protected and check mutex is held for lockdep enabled builds. Fixes: 212ed75dc5fb ("netfilter: nf_tables: integrate pipapo into commit protocol") Signed-off-by: Florian Westphal (cherry picked from commit b9f052dc68f69dac89fe1e24693354c033daa091) Signed-off-by: Greg Rose --- net/netfilter/nft_set_pipapo.c | 13 ++++++++++++- 1 file changed, 12 insertions(+), 1 deletion(-) diff --git a/net/netfilter/nft_set_pipapo.c b/net/netfilter/nft_set_pipapo.c index b713811282..113771935e 100644 --- a/net/netfilter/nft_set_pipapo.c +++ b/net/netfilter/nft_set_pipapo.c @@ -1593,6 +1593,17 @@ static void nft_pipapo_commit(const struct nft_set *set) priv->clone = new_clone; } +static bool inline nft_pipapo_transaction_mutex_held(const struct nft_set *set) +{ +#ifdef CONFIG_PROVE_LOCKING + const struct net *net = read_pnet(&set->net); + + return lockdep_is_held(&net->nft_commit_mutex); +#else + return true; +#endif +} + static void nft_pipapo_abort(const struct nft_set *set) { struct nft_pipapo *priv = nft_set_priv(set); @@ -1601,7 +1612,7 @@ static void nft_pipapo_abort(const struct nft_set *set) if (!priv->dirty) return; - m = rcu_dereference(priv->match); + m = rcu_dereference_protected(priv->match, nft_pipapo_transaction_mutex_held(set)); new_clone = pipapo_clone(m); if (IS_ERR(new_clone)) From ae0997dbf7862f888619a532d78f0bdad91f50cb Mon Sep 17 00:00:00 2001 From: Greg Rose Date: Wed, 23 Oct 2024 16:28:04 -0700 Subject: [PATCH 20/49] netfilter: nf_tables: fix kdoc warnings after gc rework jira VULN-597 subsystem-sync netfilter:nf_tables 4.18.0-534 commit-author Florian Westphal commit 08713cb006b6f07434f276c5ee214fb20c7fd965 Jakub Kicinski says: We've got some new kdoc warnings here: net/netfilter/nft_set_pipapo.c:1557: warning: Function parameter or member '_set' not described in 'pipapo_gc' net/netfilter/nft_set_pipapo.c:1557: warning: Excess function parameter 'set' description in 'pipapo_gc' include/net/netfilter/nf_tables.h:577: warning: Function parameter or member 'dead' not described in 'nft_set' Fixes: 5f68718b34a5 ("netfilter: nf_tables: GC transaction API to avoid race with control plane") Fixes: f6c383b8c31a ("netfilter: nf_tables: adapt set backend to use GC transaction API") Reported-by: Jakub Kicinski Closes: https://lore.kernel.org/netdev/20230810104638.746e46f1@kernel.org/ Signed-off-by: Florian Westphal (cherry picked from commit 08713cb006b6f07434f276c5ee214fb20c7fd965) Signed-off-by: Greg Rose (cherry picked from commit ddcae6925219c35588313d4f84e103e8a885e457) Signed-off-by: Greg Rose --- include/net/netfilter/nf_tables.h | 1 + net/netfilter/nft_set_pipapo.c | 2 +- 2 files changed, 2 insertions(+), 1 deletion(-) diff --git a/include/net/netfilter/nf_tables.h b/include/net/netfilter/nf_tables.h index e82111eda8..7a4dfb1093 100644 --- a/include/net/netfilter/nf_tables.h +++ b/include/net/netfilter/nf_tables.h @@ -467,6 +467,7 @@ struct nft_set_elem_expr { * @expr: stateful expression * @ops: set ops * @flags: set flags + * @dead: set will be freed, never cleared * @genmask: generation mask * @klen: key length * @dlen: data length diff --git a/net/netfilter/nft_set_pipapo.c b/net/netfilter/nft_set_pipapo.c index 113771935e..538f710424 100644 --- a/net/netfilter/nft_set_pipapo.c +++ b/net/netfilter/nft_set_pipapo.c @@ -1449,7 +1449,7 @@ static void nft_pipapo_gc_deactivate(struct net *net, struct nft_set *set, /** * pipapo_gc() - Drop expired entries from set, destroy start and end elements - * @set: nftables API set representation + * @_set: nftables API set representation * @m: Matching data */ static void pipapo_gc(const struct nft_set *_set, struct nft_pipapo_match *m) From ef98a427b8e14c619966fe084c95f02476484994 Mon Sep 17 00:00:00 2001 From: Greg Rose Date: Wed, 23 Oct 2024 13:35:39 -0700 Subject: [PATCH 21/49] netfilter: nf_tables: don't fail inserts if duplicate has expired jira VULN-597 subsystem-sync netfilter:nf_tables 4.18.0-534 commit-author Florian Westphal commit 7845914f45f066497ac75b30c50dbc735e84e884 nftables selftests fail: run-tests.sh testcases/sets/0044interval_overlap_0 Expected: 0-2 . 0-3, got: W: [FAILED] ./testcases/sets/0044interval_overlap_0: got 1 Insertion must ignore duplicate but expired entries. Moreover, there is a strange asymmetry in nft_pipapo_activate: It refetches the current element, whereas the other ->activate callbacks (bitmap, hash, rhash, rbtree) use elem->priv. Same for .remove: other set implementations take elem->priv, nft_pipapo_remove fetches elem->priv, then does a relookup, remove this. I suspect this was the reason for the change that prompted the removal of the expired check in pipapo_get() in the first place, but skipping exired elements there makes no sense to me, this helper is used for normal get requests, insertions (duplicate check) and deactivate callback. In first two cases expired elements must be skipped. For ->deactivate(), this gets called for DELSETELEM, so it seems to me that expired elements should be skipped as well, i.e. delete request should fail with -ENOENT error. Fixes: 24138933b97b ("netfilter: nf_tables: don't skip expired elements during walk") Signed-off-by: Florian Westphal (cherry picked from commit 7845914f45f066497ac75b30c50dbc735e84e884) Signed-off-by: Greg Rose --- net/netfilter/nft_set_pipapo.c | 23 ++++------------------- 1 file changed, 4 insertions(+), 19 deletions(-) diff --git a/net/netfilter/nft_set_pipapo.c b/net/netfilter/nft_set_pipapo.c index 538f710424..743f553278 100644 --- a/net/netfilter/nft_set_pipapo.c +++ b/net/netfilter/nft_set_pipapo.c @@ -704,6 +704,8 @@ static struct nft_pipapo_elem *pipapo_get(const struct net *net, goto out; if (last) { + if (nft_set_elem_expired(&f->mt[b].e->ext)) + goto next_match; if ((genmask && !nft_set_elem_active(&f->mt[b].e->ext, genmask))) goto next_match; @@ -738,17 +740,8 @@ static struct nft_pipapo_elem *pipapo_get(const struct net *net, void *nft_pipapo_get(const struct net *net, const struct nft_set *set, const struct nft_set_elem *elem, unsigned int flags) { - struct nft_pipapo_elem *ret; - - ret = pipapo_get(net, set, (const u8 *)elem->key.val.data, + return pipapo_get(net, set, (const u8 *)elem->key.val.data, nft_genmask_cur(net)); - if (IS_ERR(ret)) - return ret; - - if (nft_set_elem_expired(&ret->ext)) - return ERR_PTR(-ENOENT); - - return ret; } /** @@ -1639,11 +1632,7 @@ static void nft_pipapo_activate(const struct net *net, const struct nft_set *set, const struct nft_set_elem *elem) { - struct nft_pipapo_elem *e; - - e = pipapo_get(net, set, (const u8 *)elem->key.val.data, 0); - if (IS_ERR(e)) - return; + struct nft_pipapo_elem *e = elem->priv; nft_set_elem_change_active(net, set, &e->ext); } @@ -1853,10 +1842,6 @@ static void nft_pipapo_remove(const struct net *net, const struct nft_set *set, data = (const u8 *)nft_set_ext_key(&e->ext); - e = pipapo_get(net, set, data, 0); - if (IS_ERR(e)) - return; - while ((rules_f0 = pipapo_rules_same_key(m->f, first_rule))) { union nft_pipapo_map_bucket rulemap[NFT_PIPAPO_MAX_FIELDS]; const u8 *match_start, *match_end; From 9ea9c183b77233f897fac2029f6b20a3d28b68cb Mon Sep 17 00:00:00 2001 From: Greg Rose Date: Wed, 23 Oct 2024 14:11:25 -0700 Subject: [PATCH 22/49] netfilter: nf_tables: fix GC transaction races with netns and netlink event exit path jira VULN-597 subsystem-sync netfilter:nf_tables 4.18.0-534 commit-author Pablo Neira Ayuso commit 6a33d8b73dfac0a41f3877894b38082bd0c9a5bc upstream-diff There's a lot of fuzz and code differences - resolved in favor of the 534 release code. Netlink event path is missing a synchronization point with GC transactions. Add GC sequence number update to netns release path and netlink event path, any GC transaction losing race will be discarded. Fixes: 5f68718b34a5 ("netfilter: nf_tables: GC transaction API to avoid race with control plane") Signed-off-by: Pablo Neira Ayuso Signed-off-by: Florian Westphal (cherry picked from commit 6a33d8b73dfac0a41f3877894b38082bd0c9a5bc) Signed-off-by: Greg Rose --- net/netfilter/nf_tables_api.c | 31 ++++++++++++++++++++++++++++--- 1 file changed, 28 insertions(+), 3 deletions(-) diff --git a/net/netfilter/nf_tables_api.c b/net/netfilter/nf_tables_api.c index dc8ea91abd..72e59619c5 100644 --- a/net/netfilter/nf_tables_api.c +++ b/net/netfilter/nf_tables_api.c @@ -8136,10 +8136,27 @@ static void nft_set_commit_update(struct list_head *set_update_list) } } +static unsigned int nft_gc_seq_begin(struct nftables_pernet *nft_net) +{ + unsigned int gc_seq; + + /* Bump gc counter, it becomes odd, this is the busy mark. */ + gc_seq = READ_ONCE(nft_net->gc_seq); + WRITE_ONCE(nft_net->gc_seq, ++gc_seq); + + return gc_seq; +} + +static void nft_gc_seq_end(struct nftables_pernet *nft_net, unsigned int gc_seq) +{ + WRITE_ONCE(nft_net->gc_seq, ++gc_seq); +} + static int nf_tables_commit(struct net *net, struct sk_buff *skb) { struct nftables_pernet *nft_net = nft_pernet(net); struct nft_trans *trans, *next; + unsigned int base_seq, gc_seq; LIST_HEAD(set_update_list); struct nft_trans_elem *te; struct nft_chain *chain; @@ -8195,6 +8212,8 @@ static int nf_tables_commit(struct net *net, struct sk_buff *skb) WRITE_ONCE(net->nft.base_seq, base_seq); + gc_seq = nft_gc_seq_begin(nft_net); + /* step 3. Start new generation, rules_gen_X now in use. */ net->nft.gencursor = nft_gencursor_next(net); @@ -8351,6 +8370,7 @@ static int nf_tables_commit(struct net *net, struct sk_buff *skb) nf_tables_gen_notify(net, skb, NFT_MSG_NEWGEN); nf_tables_commit_audit_log(&adl, net->nft.base_seq); + nft_gc_seq_end(nft_net, gc_seq); nf_tables_commit_release(net); return 0; @@ -8498,7 +8518,7 @@ static int __nf_tables_abort(struct net *net, enum nfnl_abort_action action) break; } te = (struct nft_trans_elem *)trans->data; - te->set->ops->remove(net, te->set, &te->elem); + nft_setelem_remove(net, te->set, &te->elem); atomic_dec(&te->set->nelems); if (te->set->ops->abort && @@ -9221,15 +9241,20 @@ static void __net_exit nf_tables_pre_exit_net(struct net *net) static void __net_exit nf_tables_exit_net(struct net *net) { struct nftables_pernet *nft_net = nft_pernet(net); + unsigned int gc_seq; mutex_lock(&net->nft_commit_mutex); + gc_seq = nft_gc_seq_begin(nft_net); + if (!list_empty(&net->nft.commit_list) || - !list_empty(&net->nft_module_list)) - __nf_tables_abort(net, NFNL_ABORT_NONE); + !list_empty(&net->nft_module_list)) + __nf_tables_abort(net, NFNL_ABORT_NONE); __nft_release_tables(net); + nft_gc_seq_end(nft_net, gc_seq); + mutex_unlock(&net->nft_commit_mutex); WARN_ON_ONCE(!list_empty(&net->nft.tables)); WARN_ON_ONCE(!list_empty(&net->nft_module_list)); From c84dcb7ed8a0c8103f8c79a34e46f387b3d5b326 Mon Sep 17 00:00:00 2001 From: Greg Rose Date: Wed, 23 Oct 2024 14:43:36 -0700 Subject: [PATCH 23/49] netfilter: nf_tables: GC transaction race with netns dismantle jira VULN-597 subsystem-sync netfilter:nf_tables 4.18.0-534 commit-author Pablo Neira Ayuso commit 02c6c24402bf1c1e986899c14ba22a10b510916b Use maybe_get_net() since GC workqueue might race with netns exit path. Fixes: 5f68718b34a5 ("netfilter: nf_tables: GC transaction API to avoid race with control plane") Signed-off-by: Pablo Neira Ayuso Signed-off-by: Florian Westphal (cherry picked from commit 02c6c24402bf1c1e986899c14ba22a10b510916b) Signed-off-by: Greg Rose --- net/netfilter/nf_tables_api.c | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/net/netfilter/nf_tables_api.c b/net/netfilter/nf_tables_api.c index 72e59619c5..12b8ceebdc 100644 --- a/net/netfilter/nf_tables_api.c +++ b/net/netfilter/nf_tables_api.c @@ -7902,9 +7902,14 @@ struct nft_trans_gc *nft_trans_gc_alloc(struct nft_set *set, if (!trans) return NULL; + trans->net = maybe_get_net(net); + if (!trans->net) { + kfree(trans); + return NULL; + } + refcount_inc(&set->refs); trans->set = set; - trans->net = get_net(net); trans->seq = gc_seq; return trans; @@ -8161,7 +8166,6 @@ static int nf_tables_commit(struct net *net, struct sk_buff *skb) struct nft_trans_elem *te; struct nft_chain *chain; struct nft_table *table; - unsigned int base_seq; LIST_HEAD(adl); int err; From 9e3b392ceb24a93ab6741da11b7e73ae207bb18e Mon Sep 17 00:00:00 2001 From: Greg Rose Date: Tue, 29 Oct 2024 15:32:54 -0700 Subject: [PATCH 24/49] netfilter: nf_tables: GC transaction race with abort path jira VULN-597 subsystem-sync netfilter:nf_tables 4.18.0-534 commit-author Pablo Neira Ayuso commit 720344340fb9be2765bbaab7b292ece0a4570eae upstream-diff Some minor differences due to pernet goo - not important. Abort path is missing a synchronization point with GC transactions. Add GC sequence number hence any GC transaction losing race will be discarded. Fixes: 5f68718b34a5 ("netfilter: nf_tables: GC transaction API to avoid race with control plane") Signed-off-by: Pablo Neira Ayuso Signed-off-by: Florian Westphal (cherry-picked from commit 720344340fb9be2765bbaab7b292ece0a4570eae) Signed-off-by: Greg Rose --- net/netfilter/nf_tables_api.c | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/net/netfilter/nf_tables_api.c b/net/netfilter/nf_tables_api.c index 12b8ceebdc..70e9576bf5 100644 --- a/net/netfilter/nf_tables_api.c +++ b/net/netfilter/nf_tables_api.c @@ -8601,6 +8601,12 @@ static int nf_tables_abort(struct net *net, struct sk_buff *skb, enum nfnl_abort_action action) { struct nftables_pernet *nft_net = nft_pernet(net); + unsigned int gc_seq; + int ret; + + gc_seq = nft_gc_seq_begin(nft_net); + ret = __nf_tables_abort(net, action); + nft_gc_seq_end(nft_net, gc_seq); mutex_unlock(&net->nft_commit_mutex); From 8cfad08c46e08d26f120f98bed02be299e85d717 Mon Sep 17 00:00:00 2001 From: Greg Rose Date: Wed, 23 Oct 2024 16:48:15 -0700 Subject: [PATCH 25/49] netfilter: nf_tables: use correct lock to protect gc_list jira VULN-597 subsystem-sync netfilter:nf_tables 4.18.0-534 commit-author Pablo Neira Ayuso commit 8357bc946a2abc2a10ca40e5a2105d2b4c57515e Use nf_tables_gc_list_lock spinlock, not nf_tables_destroy_list_lock to protect the gc list. Fixes: 5f68718b34a5 ("netfilter: nf_tables: GC transaction API to avoid race with control plane") Signed-off-by: Pablo Neira Ayuso Signed-off-by: Florian Westphal (cherry picked from commit 8357bc946a2abc2a10ca40e5a2105d2b4c57515e) Signed-off-by: Greg Rose --- net/netfilter/nf_tables_api.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/net/netfilter/nf_tables_api.c b/net/netfilter/nf_tables_api.c index 70e9576bf5..75c7770c2e 100644 --- a/net/netfilter/nf_tables_api.c +++ b/net/netfilter/nf_tables_api.c @@ -7878,9 +7878,9 @@ static void nft_trans_gc_work(struct work_struct *work) struct nft_trans_gc *trans, *next; LIST_HEAD(trans_gc_list); - spin_lock(&nf_tables_destroy_list_lock); + spin_lock(&nf_tables_gc_list_lock); list_splice_init(&nf_tables_gc_list, &trans_gc_list); - spin_unlock(&nf_tables_destroy_list_lock); + spin_unlock(&nf_tables_gc_list_lock); list_for_each_entry_safe(trans, next, &trans_gc_list, list) { list_del(&trans->list); From 7ca4ba0505171a5b245614b5a367d744deb97212 Mon Sep 17 00:00:00 2001 From: Greg Rose Date: Wed, 23 Oct 2024 16:53:32 -0700 Subject: [PATCH 26/49] netfilter: nf_tables: fix out of memory error handling jira VULN-597 subsystem-sync netfilter:nf_tables 4.18.0-534 commit-author Florian Westphal commit 5e1be4cdc98c989d5387ce94ff15b5ad06a5b681 upstream-diff Using the 4.18.0-534 code as an example. Several instances of pipapo_resize() don't propagate allocation failures, this causes a crash when fault injection is enabled for gfp_kernel slabs. Fixes: 3c4287f62044 ("nf_tables: Add set type for arbitrary concatenation of ranges") Signed-off-by: Florian Westphal Reviewed-by: Stefano Brivio (cherry picked from commit 5e1be4cdc98c989d5387ce94ff15b5ad06a5b681) Signed-off-by: Greg Rose --- net/netfilter/nft_set_pipapo.c | 13 ++++++++++--- 1 file changed, 10 insertions(+), 3 deletions(-) diff --git a/net/netfilter/nft_set_pipapo.c b/net/netfilter/nft_set_pipapo.c index 743f553278..0c4983ad96 100644 --- a/net/netfilter/nft_set_pipapo.c +++ b/net/netfilter/nft_set_pipapo.c @@ -850,12 +850,14 @@ static void pipapo_bucket_set(struct nft_pipapo_field *f, int rule, int group, static int pipapo_insert(struct nft_pipapo_field *f, const uint8_t *k, int mask_bits) { - int rule = f->rules++, group, ret; + int rule = f->rules, group, ret; - ret = pipapo_resize(f, f->rules - 1, f->rules); + ret = pipapo_resize(f, f->rules, f->rules + 1); if (ret) return ret; + f->rules++; + for (group = 0; group < f->groups; group++) { int i, v; u8 mask; @@ -995,7 +997,9 @@ static int pipapo_expand(struct nft_pipapo_field *f, step++; if (step >= len) { if (!masks) { - pipapo_insert(f, base, 0); + err = pipapo_insert(f, base, 0); + if (err < 0) + return err; masks = 1; } goto out; @@ -1151,6 +1155,9 @@ static int nft_pipapo_insert(const struct net *net, const struct nft_set *set, f->groups * NFT_PIPAPO_GROUP_BITS); } + if (ret < 0) + return ret; + if (f->bsize > bsize_max) bsize_max = f->bsize; From dc9592f7604bac2a4c54fe999f5dacc5d0817250 Mon Sep 17 00:00:00 2001 From: Greg Rose Date: Wed, 23 Oct 2024 16:59:06 -0700 Subject: [PATCH 27/49] netfilter: nf_tables: defer gc run if previous batch is still pending jira VULN-597 subsystem-sync netfilter:nf_tables 4.18.0-534 commit-author Florian Westphal commit 8e51830e29e12670b4c10df070a4ea4c9593e961 Don't queue more gc work, else we may queue the same elements multiple times. If an element is flagged as dead, this can mean that either the previous gc request was invalidated/discarded by a transaction or that the previous request is still pending in the system work queue. The latter will happen if the gc interval is set to a very low value, e.g. 1ms, and system work queue is backlogged. The sets refcount is 1 if no previous gc requeusts are queued, so add a helper for this and skip gc run if old requests are pending. Add a helper for this and skip the gc run in this case. Fixes: f6c383b8c31a ("netfilter: nf_tables: adapt set backend to use GC transaction API") Signed-off-by: Florian Westphal Reviewed-by: Pablo Neira Ayuso (cherry picked from commit 8e51830e29e12670b4c10df070a4ea4c9593e961) Signed-off-by: Greg Rose --- include/net/netfilter/nf_tables.h | 5 +++++ net/netfilter/nft_set_hash.c | 3 +++ net/netfilter/nft_set_rbtree.c | 3 +++ 3 files changed, 11 insertions(+) diff --git a/include/net/netfilter/nf_tables.h b/include/net/netfilter/nf_tables.h index 7a4dfb1093..f8983f8f37 100644 --- a/include/net/netfilter/nf_tables.h +++ b/include/net/netfilter/nf_tables.h @@ -519,6 +519,11 @@ static inline void *nft_set_priv(const struct nft_set *set) return (void *)set->data; } +static inline bool nft_set_gc_is_pending(const struct nft_set *s) +{ + return refcount_read(&s->refs) != 1; +} + static inline struct nft_set *nft_set_container_of(const void *priv) { return (void *)priv - offsetof(struct nft_set, data); diff --git a/net/netfilter/nft_set_hash.c b/net/netfilter/nft_set_hash.c index e8d10bba30..dd915a16a3 100644 --- a/net/netfilter/nft_set_hash.c +++ b/net/netfilter/nft_set_hash.c @@ -308,6 +308,9 @@ static void nft_rhash_gc(struct work_struct *work) nft_net = nft_pernet(net); gc_seq = READ_ONCE(nft_net->gc_seq); + if (nft_set_gc_is_pending(set)) + goto done; + gc = nft_trans_gc_alloc(set, gc_seq, GFP_KERNEL); if (!gc) goto done; diff --git a/net/netfilter/nft_set_rbtree.c b/net/netfilter/nft_set_rbtree.c index 59c3bcc6fc..3d1e8f0d7a 100644 --- a/net/netfilter/nft_set_rbtree.c +++ b/net/netfilter/nft_set_rbtree.c @@ -481,6 +481,9 @@ static void nft_rbtree_gc(struct work_struct *work) nft_net = nft_pernet(net); gc_seq = READ_ONCE(nft_net->gc_seq); + if (nft_set_gc_is_pending(set)) + goto done; + gc = nft_trans_gc_alloc(set, gc_seq, GFP_KERNEL); if (!gc) goto done; From f2de13ad4f6e8fe61d90b5600e01e88d4bd5e2c3 Mon Sep 17 00:00:00 2001 From: Greg Rose Date: Wed, 23 Oct 2024 17:03:56 -0700 Subject: [PATCH 28/49] netfilter: nf_tables: fix nft_trans type confusion jira VULN-597 subsystem-sync netfilter:nf_tables 4.18.0-534 commit-author Florian Westphal commit e3c361b8acd636f5fe80c02849ca175201edf10c upstream-diff - Some cruft in nft_rule_lookup_byid() - resolved by using branch 4.18.0-534 as the source of truth. nft_trans_FOO objects all share a common nft_trans base structure, but trailing fields depend on the real object size. Access is only safe after trans->msg_type check. Check for rule type first. Found by code inspection. Fixes: 1a94e38d254b ("netfilter: nf_tables: add NFTA_RULE_ID attribute") Signed-off-by: Florian Westphal (cherry picked from commit e3c361b8acd636f5fe80c02849ca175201edf10c) Signed-off-by: Greg Rose --- net/netfilter/nf_tables_api.c | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/net/netfilter/nf_tables_api.c b/net/netfilter/nf_tables_api.c index 75c7770c2e..390167eaab 100644 --- a/net/netfilter/nf_tables_api.c +++ b/net/netfilter/nf_tables_api.c @@ -3330,12 +3330,10 @@ static struct nft_rule *nft_rule_lookup_byid(const struct net *net, struct nft_trans *trans; list_for_each_entry(trans, &net->nft.commit_list, list) { - struct nft_rule *rule = nft_trans_rule(trans); - if (trans->msg_type == NFT_MSG_NEWRULE && trans->ctx.chain == chain && id == nft_trans_rule_id(trans)) - return rule; + return nft_trans_rule(trans); } return ERR_PTR(-ENOENT); } From 9d0955cc2eb7b94a8a2c1f4467fa924cd75f79bb Mon Sep 17 00:00:00 2001 From: Greg Rose Date: Wed, 23 Oct 2024 18:22:36 -0700 Subject: [PATCH 29/49] netfilter: nf_tables: disallow element removal on anonymous sets jira VULN-597 subsystem-sync netfilter:nf_tables 4.18.0-534 commit-author Pablo Neira Ayuso commit 23a3bfd4ba7acd36abf52b78605f61b21bdac216 Anonymous sets need to be populated once at creation and then they are bound to rule since 938154b93be8 ("netfilter: nf_tables: reject unbound anonymous set before commit phase"), otherwise transaction reports EINVAL. Userspace does not need to delete elements of anonymous sets that are not yet bound, reject this with EOPNOTSUPP. From flush command path, skip anonymous sets, they are expected to be bound already. Otherwise, EINVAL is hit at the end of this transaction for unbound sets. Fixes: 96518518cc41 ("netfilter: add nftables") Signed-off-by: Pablo Neira Ayuso (cherry picked from commit 23a3bfd4ba7acd36abf52b78605f61b21bdac216) Signed-off-by: Greg Rose --- net/netfilter/nf_tables_api.c | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/net/netfilter/nf_tables_api.c b/net/netfilter/nf_tables_api.c index 390167eaab..3045a1f1c4 100644 --- a/net/netfilter/nf_tables_api.c +++ b/net/netfilter/nf_tables_api.c @@ -1139,8 +1139,7 @@ static int nft_flush_table(struct nft_ctx *ctx) if (!nft_is_active_next(ctx->net, set)) continue; - if (nft_set_is_anonymous(set) && - !list_empty(&set->bindings)) + if (nft_set_is_anonymous(set)) continue; err = nft_delset(ctx, set); @@ -5921,8 +5920,10 @@ static int nf_tables_delsetelem(struct net *net, struct sock *nlsk, if (IS_ERR(set)) return PTR_ERR(set); - if (!list_empty(&set->bindings) && - (set->flags & (NFT_SET_CONSTANT | NFT_SET_ANONYMOUS))) + if (nft_set_is_anonymous(set)) + return -EOPNOTSUPP; + + if (!list_empty(&set->bindings) && (set->flags & NFT_SET_CONSTANT)) return -EBUSY; if (nla[NFTA_SET_ELEM_LIST_ELEMENTS] == NULL) { From 10c7204e3d9c62afd8788800fb1cb1fa20ecee7b Mon Sep 17 00:00:00 2001 From: Greg Rose Date: Wed, 23 Oct 2024 18:28:42 -0700 Subject: [PATCH 30/49] netfilter: nftables: update table flags from the commit phase jira VULN-597 subsystem-sync netfilter:nf_tables 4.18.0-534 commit-author Pablo Neira Ayuso commit 0ce7cf4127f14078ca598ba9700d813178a59409 Do not update table flags from the preparation phase. Store the flags update into the transaction, then update the flags from the commit phase. Signed-off-by: Pablo Neira Ayuso (cherry picked from commit 0ce7cf4127f14078ca598ba9700d813178a59409) Signed-off-by: Greg Rose --- include/net/netfilter/nf_tables.h | 9 ++++++--- net/netfilter/nf_tables_api.c | 31 ++++++++++++++++--------------- 2 files changed, 22 insertions(+), 18 deletions(-) diff --git a/include/net/netfilter/nf_tables.h b/include/net/netfilter/nf_tables.h index f8983f8f37..a578cf9fbf 100644 --- a/include/net/netfilter/nf_tables.h +++ b/include/net/netfilter/nf_tables.h @@ -1449,13 +1449,16 @@ struct nft_trans_chain { struct nft_trans_table { bool update; - bool enable; + u8 state; + u32 flags; }; #define nft_trans_table_update(trans) \ (((struct nft_trans_table *)trans->data)->update) -#define nft_trans_table_enable(trans) \ - (((struct nft_trans_table *)trans->data)->enable) +#define nft_trans_table_state(trans) \ + (((struct nft_trans_table *)trans->data)->state) +#define nft_trans_table_flags(trans) \ + (((struct nft_trans_table *)trans->data)->flags) struct nft_trans_elem { struct nft_set *set; diff --git a/net/netfilter/nf_tables_api.c b/net/netfilter/nf_tables_api.c index 3045a1f1c4..9c0ef774b2 100644 --- a/net/netfilter/nf_tables_api.c +++ b/net/netfilter/nf_tables_api.c @@ -945,6 +945,12 @@ static void nf_tables_table_disable(struct net *net, struct nft_table *table) nft_table_disable(net, table, 0); } +enum { + NFT_TABLE_STATE_UNCHANGED = 0, + NFT_TABLE_STATE_DORMANT, + NFT_TABLE_STATE_WAKEUP +}; + static int nf_tables_updtable(struct nft_ctx *ctx) { struct nft_trans *trans; @@ -968,19 +974,17 @@ static int nf_tables_updtable(struct nft_ctx *ctx) if ((flags & NFT_TABLE_F_DORMANT) && !(ctx->table->flags & NFT_TABLE_F_DORMANT)) { - nft_trans_table_enable(trans) = false; + nft_trans_table_state(trans) = NFT_TABLE_STATE_DORMANT; } else if (!(flags & NFT_TABLE_F_DORMANT) && ctx->table->flags & NFT_TABLE_F_DORMANT) { - ctx->table->flags &= ~NFT_TABLE_F_DORMANT; ret = nf_tables_table_enable(ctx->net, ctx->table); if (ret >= 0) - nft_trans_table_enable(trans) = true; - else - ctx->table->flags |= NFT_TABLE_F_DORMANT; + nft_trans_table_state(trans) = NFT_TABLE_STATE_WAKEUP; } if (ret < 0) goto err; + nft_trans_table_flags(trans) = flags; nft_trans_table_update(trans) = true; list_add_tail(&trans->list, &ctx->net->nft.commit_list); return 0; @@ -8226,11 +8230,10 @@ static int nf_tables_commit(struct net *net, struct sk_buff *skb) switch (trans->msg_type) { case NFT_MSG_NEWTABLE: if (nft_trans_table_update(trans)) { - if (!nft_trans_table_enable(trans)) { - nf_tables_table_disable(net, - trans->ctx.table); - trans->ctx.table->flags |= NFT_TABLE_F_DORMANT; - } + if (nft_trans_table_state(trans) == NFT_TABLE_STATE_DORMANT) + nf_tables_table_disable(net, trans->ctx.table); + + trans->ctx.table->flags = nft_trans_table_flags(trans); } else { nft_clear(net, trans->ctx.table); } @@ -8452,11 +8455,9 @@ static int __nf_tables_abort(struct net *net, enum nfnl_abort_action action) switch (trans->msg_type) { case NFT_MSG_NEWTABLE: if (nft_trans_table_update(trans)) { - if (nft_trans_table_enable(trans)) { - nf_tables_table_disable(net, - trans->ctx.table); - trans->ctx.table->flags |= NFT_TABLE_F_DORMANT; - } + if (nft_trans_table_state(trans) == NFT_TABLE_STATE_WAKEUP) + nf_tables_table_disable(net, trans->ctx.table); + nft_trans_destroy(trans); } else { list_del_rcu(&trans->ctx.table->list); From ca0350260ed99660173fb98c4f26993f2e8978e4 Mon Sep 17 00:00:00 2001 From: Greg Rose Date: Wed, 23 Oct 2024 18:39:50 -0700 Subject: [PATCH 31/49] netfilter: nf_tables: fix table flag updates jira VULN-597 subsystem-sync netfilter:nf_tables 4.18.0-534 commit-author Pablo Neira Ayuso commit 179d9ba5559a756f4322583388b3213fe4e391b0 upstream-diff Again some cruft around an upstream commit that Red Hat did not take - using 4.18.0-534 as the source of truth for the commit. The dormant flag need to be updated from the preparation phase, otherwise, two consecutive requests to dorm a table in the same batch might try to remove the same hooks twice, resulting in the following warning: hook not found, pf 3 num 0 WARNING: CPU: 0 PID: 334 at net/netfilter/core.c:480 __nf_unregister_net_hook+0x1eb/0x610 net/netfilter/core.c:480 Modules linked in: CPU: 0 PID: 334 Comm: kworker/u4:5 Not tainted 5.12.0-syzkaller #0 Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS Google 01/01/2011 Workqueue: netns cleanup_net RIP: 0010:__nf_unregister_net_hook+0x1eb/0x610 net/netfilter/core.c:480 This patch is a partial revert of 0ce7cf4127f1 ("netfilter: nftables: update table flags from the commit phase") to restore the previous behaviour. However, there is still another problem: A batch containing a series of dorm-wakeup-dorm table and vice-versa also trigger the warning above since hook unregistration happens from the preparation phase, while hook registration occurs from the commit phase. To fix this problem, this patch adds two internal flags to annotate the original dormant flag status which are __NFT_TABLE_F_WAS_DORMANT and __NFT_TABLE_F_WAS_AWAKEN, to restore it from the abort path. The __NFT_TABLE_F_UPDATE bitmask allows to handle the dormant flag update with one single transaction. Reported-by: syzbot+7ad5cd1615f2d89c6e7e@syzkaller.appspotmail.com Fixes: 0ce7cf4127f1 ("netfilter: nftables: update table flags from the commit phase") Signed-off-by: Pablo Neira Ayuso (cherry picked from commit 179d9ba5559a756f4322583388b3213fe4e391b0) Signed-off-by: Greg Rose --- include/net/netfilter/nf_tables.h | 6 --- include/uapi/linux/netfilter/nf_tables.h | 1 + net/netfilter/nf_tables_api.c | 59 ++++++++++++++++-------- 3 files changed, 41 insertions(+), 25 deletions(-) diff --git a/include/net/netfilter/nf_tables.h b/include/net/netfilter/nf_tables.h index a578cf9fbf..b658ac8fb8 100644 --- a/include/net/netfilter/nf_tables.h +++ b/include/net/netfilter/nf_tables.h @@ -1449,16 +1449,10 @@ struct nft_trans_chain { struct nft_trans_table { bool update; - u8 state; - u32 flags; }; #define nft_trans_table_update(trans) \ (((struct nft_trans_table *)trans->data)->update) -#define nft_trans_table_state(trans) \ - (((struct nft_trans_table *)trans->data)->state) -#define nft_trans_table_flags(trans) \ - (((struct nft_trans_table *)trans->data)->flags) struct nft_trans_elem { struct nft_set *set; diff --git a/include/uapi/linux/netfilter/nf_tables.h b/include/uapi/linux/netfilter/nf_tables.h index 3e9dcd1da6..a59f04845b 100644 --- a/include/uapi/linux/netfilter/nf_tables.h +++ b/include/uapi/linux/netfilter/nf_tables.h @@ -164,6 +164,7 @@ enum nft_hook_attributes { enum nft_table_flags { NFT_TABLE_F_DORMANT = 0x1, }; +#define NFT_TABLE_F_MASK (NFT_TABLE_F_DORMANT) /** * enum nft_table_attributes - nf_tables table netlink attributes diff --git a/net/netfilter/nf_tables_api.c b/net/netfilter/nf_tables_api.c index 9c0ef774b2..30519b8bec 100644 --- a/net/netfilter/nf_tables_api.c +++ b/net/netfilter/nf_tables_api.c @@ -739,7 +739,8 @@ static int nf_tables_fill_table_info(struct sk_buff *skb, struct net *net, nfmsg->res_id = htons(net->nft.base_seq & 0xffff); if (nla_put_string(skb, NFTA_TABLE_NAME, table->name) || - nla_put_be32(skb, NFTA_TABLE_FLAGS, htonl(table->flags)) || + nla_put_be32(skb, NFTA_TABLE_FLAGS, + htonl(table->flags & NFT_TABLE_F_MASK)) || nla_put_be32(skb, NFTA_TABLE_USE, htonl(table->use)) || nla_put_be64(skb, NFTA_TABLE_HANDLE, cpu_to_be64(table->handle), NFTA_TABLE_PAD)) @@ -942,20 +943,22 @@ static int nf_tables_table_enable(struct net *net, struct nft_table *table) static void nf_tables_table_disable(struct net *net, struct nft_table *table) { + table->flags &= ~NFT_TABLE_F_DORMANT; nft_table_disable(net, table, 0); + table->flags |= NFT_TABLE_F_DORMANT; } -enum { - NFT_TABLE_STATE_UNCHANGED = 0, - NFT_TABLE_STATE_DORMANT, - NFT_TABLE_STATE_WAKEUP -}; +#define __NFT_TABLE_F_INTERNAL (NFT_TABLE_F_MASK + 1) +#define __NFT_TABLE_F_WAS_DORMANT (__NFT_TABLE_F_INTERNAL << 0) +#define __NFT_TABLE_F_WAS_AWAKEN (__NFT_TABLE_F_INTERNAL << 1) +#define __NFT_TABLE_F_UPDATE (__NFT_TABLE_F_WAS_DORMANT | \ + __NFT_TABLE_F_WAS_AWAKEN) static int nf_tables_updtable(struct nft_ctx *ctx) { struct nft_trans *trans; u32 flags; - int ret = 0; + int ret; if (!ctx->nla[NFTA_TABLE_FLAGS]) return 0; @@ -974,21 +977,27 @@ static int nf_tables_updtable(struct nft_ctx *ctx) if ((flags & NFT_TABLE_F_DORMANT) && !(ctx->table->flags & NFT_TABLE_F_DORMANT)) { - nft_trans_table_state(trans) = NFT_TABLE_STATE_DORMANT; + ctx->table->flags |= NFT_TABLE_F_DORMANT; + if (!(ctx->table->flags & __NFT_TABLE_F_UPDATE)) + ctx->table->flags |= __NFT_TABLE_F_WAS_AWAKEN; } else if (!(flags & NFT_TABLE_F_DORMANT) && ctx->table->flags & NFT_TABLE_F_DORMANT) { - ret = nf_tables_table_enable(ctx->net, ctx->table); - if (ret >= 0) - nft_trans_table_state(trans) = NFT_TABLE_STATE_WAKEUP; + ctx->table->flags &= ~NFT_TABLE_F_DORMANT; + if (!(ctx->table->flags & __NFT_TABLE_F_UPDATE)) { + ret = nf_tables_table_enable(ctx->net, ctx->table); + if (ret < 0) + goto err_register_hooks; + + ctx->table->flags |= __NFT_TABLE_F_WAS_DORMANT; + } } - if (ret < 0) - goto err; - nft_trans_table_flags(trans) = flags; nft_trans_table_update(trans) = true; list_add_tail(&trans->list, &ctx->net->nft.commit_list); + return 0; -err: + +err_register_hooks: nft_trans_destroy(trans); return ret; } @@ -8230,10 +8239,14 @@ static int nf_tables_commit(struct net *net, struct sk_buff *skb) switch (trans->msg_type) { case NFT_MSG_NEWTABLE: if (nft_trans_table_update(trans)) { - if (nft_trans_table_state(trans) == NFT_TABLE_STATE_DORMANT) + if (!(trans->ctx.table->flags & __NFT_TABLE_F_UPDATE)) { + nft_trans_destroy(trans); + break; + } + if (trans->ctx.table->flags & NFT_TABLE_F_DORMANT) nf_tables_table_disable(net, trans->ctx.table); - trans->ctx.table->flags = nft_trans_table_flags(trans); + trans->ctx.table->flags &= ~__NFT_TABLE_F_UPDATE; } else { nft_clear(net, trans->ctx.table); } @@ -8455,9 +8468,17 @@ static int __nf_tables_abort(struct net *net, enum nfnl_abort_action action) switch (trans->msg_type) { case NFT_MSG_NEWTABLE: if (nft_trans_table_update(trans)) { - if (nft_trans_table_state(trans) == NFT_TABLE_STATE_WAKEUP) + if (!(trans->ctx.table->flags & __NFT_TABLE_F_UPDATE)) { + nft_trans_destroy(trans); + break; + } + if (trans->ctx.table->flags & __NFT_TABLE_F_WAS_DORMANT) { nf_tables_table_disable(net, trans->ctx.table); - + trans->ctx.table->flags |= NFT_TABLE_F_DORMANT; + } else if (trans->ctx.table->flags & __NFT_TABLE_F_WAS_AWAKEN) { + trans->ctx.table->flags &= ~NFT_TABLE_F_DORMANT; + } + trans->ctx.table->flags &= ~__NFT_TABLE_F_UPDATE; nft_trans_destroy(trans); } else { list_del_rcu(&trans->ctx.table->list); From 0bbef0380016e7c09149257a97473db197ecac7d Mon Sep 17 00:00:00 2001 From: Greg Rose Date: Wed, 23 Oct 2024 18:44:56 -0700 Subject: [PATCH 32/49] netfilter: nf_tables: disable toggling dormant table state more than once jira VULN-597 subsystem-sync netfilter:nf_tables 4.18.0-534 commit-author Florian Westphal commit c9bd26513b3a11b3adb3c2ed8a31a01a87173ff1 upstream-diff Onced again cherry-pick pulls in unrelated cruft, the patch itself is fine - as per usual the source of truth is 4.18.0-534 nft -f -< Cc: Bing-Jhong Billy Jheng Cc: info@starlabs.sg Signed-off-by: Florian Westphal (cherry picked from commit c9bd26513b3a11b3adb3c2ed8a31a01a87173ff1) Signed-off-by: Greg Rose --- net/netfilter/nf_tables_api.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/net/netfilter/nf_tables_api.c b/net/netfilter/nf_tables_api.c index 30519b8bec..d6420b54aa 100644 --- a/net/netfilter/nf_tables_api.c +++ b/net/netfilter/nf_tables_api.c @@ -970,6 +970,10 @@ static int nf_tables_updtable(struct nft_ctx *ctx) if (flags == ctx->table->flags) return 0; + /* No dormant off/on/off/on games in single transaction */ + if (ctx->table->flags & __NFT_TABLE_F_UPDATE) + return -EINVAL; + trans = nft_trans_alloc(ctx, NFT_MSG_NEWTABLE, sizeof(struct nft_trans_table)); if (trans == NULL) From 92adac9d43b659710e116ef7336afe239ee5408e Mon Sep 17 00:00:00 2001 From: Greg Rose Date: Wed, 23 Oct 2024 18:57:05 -0700 Subject: [PATCH 33/49] netfilter: nf_tables: fix memleak when more than 255 elements expired jira VULN-597 cve CVE-2023-52581 commit-author Florian Westphal commit cf5000a7787cbc10341091d37245a42c119d26c5 upstream-diff some cruft around GPL symbol exports When more than 255 elements expired we're supposed to switch to a new gc container structure. This never happens: u8 type will wrap before reaching the boundary and nft_trans_gc_space() always returns true. This means we recycle the initial gc container structure and lose track of the elements that came before. While at it, don't deref 'gc' after we've passed it to call_rcu. Fixes: 5f68718b34a5 ("netfilter: nf_tables: GC transaction API to avoid race with control plane") Reported-by: Pablo Neira Ayuso Signed-off-by: Florian Westphal (cherry picked from commit cf5000a7787cbc10341091d37245a42c119d26c5) Signed-off-by: Greg Rose --- include/net/netfilter/nf_tables.h | 2 +- net/netfilter/nf_tables_api.c | 10 ++++++++-- 2 files changed, 9 insertions(+), 3 deletions(-) diff --git a/include/net/netfilter/nf_tables.h b/include/net/netfilter/nf_tables.h index b658ac8fb8..e6d1d28352 100644 --- a/include/net/netfilter/nf_tables.h +++ b/include/net/netfilter/nf_tables.h @@ -1494,7 +1494,7 @@ struct nft_trans_gc { struct net *net; struct nft_set *set; u32 seq; - u8 count; + u16 count; void *priv[NFT_TRANS_GC_BATCHCOUNT]; struct rcu_head rcu; }; diff --git a/net/netfilter/nf_tables_api.c b/net/netfilter/nf_tables_api.c index d6420b54aa..090c9388d1 100644 --- a/net/netfilter/nf_tables_api.c +++ b/net/netfilter/nf_tables_api.c @@ -7955,12 +7955,15 @@ static int nft_trans_gc_space(struct nft_trans_gc *trans) struct nft_trans_gc *nft_trans_gc_queue_async(struct nft_trans_gc *gc, unsigned int gc_seq, gfp_t gfp) { + struct nft_set *set; + if (nft_trans_gc_space(gc)) return gc; + set = gc->set; nft_trans_gc_queue_work(gc); - return nft_trans_gc_alloc(gc->set, gc_seq, gfp); + return nft_trans_gc_alloc(set, gc_seq, gfp); } EXPORT_SYMBOL_GPL(nft_trans_gc_queue_async); @@ -7977,15 +7980,18 @@ EXPORT_SYMBOL_GPL(nft_trans_gc_queue_async_done); struct nft_trans_gc *nft_trans_gc_queue_sync(struct nft_trans_gc *gc, gfp_t gfp) { + struct nft_set *set; + if (WARN_ON_ONCE(!lockdep_commit_lock_is_held(gc->net))) return NULL; if (nft_trans_gc_space(gc)) return gc; + set = gc->set; call_rcu(&gc->rcu, nft_trans_gc_trans_free); - return nft_trans_gc_alloc(gc->set, 0, gfp); + return nft_trans_gc_alloc(set, 0, gfp); } EXPORT_SYMBOL_GPL(nft_trans_gc_queue_sync); From 2d7983edb4729c70ceab87ca4dddb5194ced9805 Mon Sep 17 00:00:00 2001 From: Greg Rose Date: Sat, 26 Oct 2024 11:00:57 -0700 Subject: [PATCH 34/49] netfilter: nf_tables: mark set as dead when unbinding anonymous set with timeout jira VULN-835 cve CVE-2024-26643 commit-author Pablo Neira Ayuso commit 552705a3650bbf46a22b1adedc1b04181490fc36 While the rhashtable set gc runs asynchronously, a race allows it to collect elements from anonymous sets with timeouts while it is being released from the commit path. Mingi Cho originally reported this issue in a different path in 6.1.x with a pipapo set with low timeouts which is not possible upstream since 7395dfacfff6 ("netfilter: nf_tables: use timestamp to check for set element timeout"). Fix this by setting on the dead flag for anonymous sets to skip async gc in this case. According to 08e4c8c5919f ("netfilter: nf_tables: mark newset as dead on transaction abort"), Florian plans to accelerate abort path by releasing objects via workqueue, therefore, this sets on the dead flag for abort path too. Cc: stable@vger.kernel.org Fixes: 5f68718b34a5 ("netfilter: nf_tables: GC transaction API to avoid race with control plane") Reported-by: Mingi Cho Signed-off-by: Pablo Neira Ayuso (cherry picked from commit 552705a3650bbf46a22b1adedc1b04181490fc36) Signed-off-by: Greg Rose --- net/netfilter/nf_tables_api.c | 1 + 1 file changed, 1 insertion(+) diff --git a/net/netfilter/nf_tables_api.c b/net/netfilter/nf_tables_api.c index 090c9388d1..d1b4673404 100644 --- a/net/netfilter/nf_tables_api.c +++ b/net/netfilter/nf_tables_api.c @@ -4517,6 +4517,7 @@ void nf_tables_unbind_set(const struct nft_ctx *ctx, struct nft_set *set, if (list_empty(&set->bindings) && nft_set_is_anonymous(set)) { list_del_rcu(&set->list); + set->dead = 1; if (event) nf_tables_set_notify(ctx, set, NFT_MSG_DELSET, GFP_KERNEL); From 4227fa3bcc204dd63d77c638d0cf85c3c9e6b8f6 Mon Sep 17 00:00:00 2001 From: Greg Rose Date: Thu, 31 Oct 2024 16:11:36 -0700 Subject: [PATCH 35/49] netfilter: nf_tables: set backend .flush always succeeds jira VULN-835 subsystem-sync netfilter:nf_tables 4.18.0-534 commit-author Pablo Neira Ayuso commit 6509a2e410c3cb36c78a0a85c6102debe171337e upstream-diff - A conflict in nft_pipapo_flush resolved by favoring the 4.18.0-0-534 tagged code. .flush is always successful since this results from iterating over the set elements to toggle mark the element as inactive in the next generation. Signed-off-by: Pablo Neira Ayuso (cherry picked from commit 6509a2e410c3cb36c78a0a85c6102debe171337e) Signed-off-by: Greg Rose Conflicts: net/netfilter/nft_set_pipapo.c --- include/net/netfilter/nf_tables.h | 2 +- net/netfilter/nf_tables_api.c | 9 +-------- net/netfilter/nft_set_bitmap.c | 4 +--- net/netfilter/nft_set_hash.c | 7 ++----- net/netfilter/nft_set_pipapo.c | 5 ++--- net/netfilter/nft_set_rbtree.c | 4 +--- 6 files changed, 8 insertions(+), 23 deletions(-) diff --git a/include/net/netfilter/nf_tables.h b/include/net/netfilter/nf_tables.h index e6d1d28352..6f5b0acc79 100644 --- a/include/net/netfilter/nf_tables.h +++ b/include/net/netfilter/nf_tables.h @@ -375,7 +375,7 @@ struct nft_set_ops { void * (*deactivate)(const struct net *net, const struct nft_set *set, const struct nft_set_elem *elem); - bool (*flush)(const struct net *net, + void (*flush)(const struct net *net, const struct nft_set *set, void *priv); void (*remove)(const struct net *net, diff --git a/net/netfilter/nf_tables_api.c b/net/netfilter/nf_tables_api.c index d1b4673404..d9346fab34 100644 --- a/net/netfilter/nf_tables_api.c +++ b/net/netfilter/nf_tables_api.c @@ -5894,17 +5894,13 @@ static int nft_flush_set(const struct nft_ctx *ctx, struct nft_set_elem *elem) { struct nft_trans *trans; - int err; trans = nft_trans_alloc_gfp(ctx, NFT_MSG_DELSETELEM, sizeof(struct nft_trans_elem), GFP_ATOMIC); if (!trans) return -ENOMEM; - if (!set->ops->flush(ctx->net, set, elem->priv)) { - err = -ENOENT; - goto err1; - } + set->ops->flush(ctx->net, set, elem->priv); set->ndeact++; nft_setelem_data_deactivate(ctx->net, set, elem); @@ -5913,9 +5909,6 @@ static int nft_flush_set(const struct nft_ctx *ctx, list_add_tail(&trans->list, &ctx->net->nft.commit_list); return 0; -err1: - kfree(trans); - return err; } static int nf_tables_delsetelem(struct net *net, struct sock *nlsk, diff --git a/net/netfilter/nft_set_bitmap.c b/net/netfilter/nft_set_bitmap.c index b127826b5d..ba9acdb055 100644 --- a/net/netfilter/nft_set_bitmap.c +++ b/net/netfilter/nft_set_bitmap.c @@ -176,7 +176,7 @@ static void nft_bitmap_activate(const struct net *net, nft_set_elem_change_active(net, set, &be->ext); } -static bool nft_bitmap_flush(const struct net *net, +static void nft_bitmap_flush(const struct net *net, const struct nft_set *set, void *_be) { struct nft_bitmap *priv = nft_set_priv(set); @@ -188,8 +188,6 @@ static bool nft_bitmap_flush(const struct net *net, /* Enter 10 state, similar to deactivation. */ priv->bitmap[idx] &= ~(genmask << off); nft_set_elem_change_active(net, set, &be->ext); - - return true; } static void *nft_bitmap_deactivate(const struct net *net, diff --git a/net/netfilter/nft_set_hash.c b/net/netfilter/nft_set_hash.c index dd915a16a3..24467ab02e 100644 --- a/net/netfilter/nft_set_hash.c +++ b/net/netfilter/nft_set_hash.c @@ -194,14 +194,12 @@ static void nft_rhash_activate(const struct net *net, const struct nft_set *set, nft_set_elem_change_active(net, set, &he->ext); } -static bool nft_rhash_flush(const struct net *net, +static void nft_rhash_flush(const struct net *net, const struct nft_set *set, void *priv) { struct nft_rhash_elem *he = priv; nft_set_elem_change_active(net, set, &he->ext); - - return true; } static void *nft_rhash_deactivate(const struct net *net, @@ -567,13 +565,12 @@ static void nft_hash_activate(const struct net *net, const struct nft_set *set, nft_set_elem_change_active(net, set, &he->ext); } -static bool nft_hash_flush(const struct net *net, +static void nft_hash_flush(const struct net *net, const struct nft_set *set, void *priv) { struct nft_hash_elem *he = priv; nft_set_elem_change_active(net, set, &he->ext); - return true; } static void *nft_hash_deactivate(const struct net *net, diff --git a/net/netfilter/nft_set_pipapo.c b/net/netfilter/nft_set_pipapo.c index 0c4983ad96..63263bf948 100644 --- a/net/netfilter/nft_set_pipapo.c +++ b/net/netfilter/nft_set_pipapo.c @@ -1706,13 +1706,12 @@ static void *nft_pipapo_deactivate(const struct net *net, * * Return: true if element was found and deactivated. */ -static bool nft_pipapo_flush(const struct net *net, const struct nft_set *set, +static void nft_pipapo_flush(const struct net *net, const struct nft_set *set, void *elem) { struct nft_pipapo_elem *e = elem; - return pipapo_deactivate(net, set, (const u8 *)nft_set_ext_key(&e->ext), - &e->ext); + nft_set_elem_change_active(net, set, &e->ext); } /** diff --git a/net/netfilter/nft_set_rbtree.c b/net/netfilter/nft_set_rbtree.c index 3d1e8f0d7a..a2d0d18630 100644 --- a/net/netfilter/nft_set_rbtree.c +++ b/net/netfilter/nft_set_rbtree.c @@ -384,14 +384,12 @@ static void nft_rbtree_activate(const struct net *net, nft_set_elem_change_active(net, set, &rbe->ext); } -static bool nft_rbtree_flush(const struct net *net, +static void nft_rbtree_flush(const struct net *net, const struct nft_set *set, void *priv) { struct nft_rbtree_elem *rbe = priv; nft_set_elem_change_active(net, set, &rbe->ext); - - return true; } static void *nft_rbtree_deactivate(const struct net *net, From 24e23550003032c5c93d13652f5e69eea2228cc6 Mon Sep 17 00:00:00 2001 From: Greg Rose Date: Mon, 4 Nov 2024 11:15:17 -0800 Subject: [PATCH 36/49] netfilter: nf_tables: bail out on mismatching dynset and set expressions jira VULN-683 cve CVE-2023-6622 commit-author Pablo Neira Ayuso commit 3701cd390fd731ee7ae8b8006246c8db82c72bea If dynset expressions provided by userspace is larger than the declared set expressions, then bail out. Fixes: 48b0ae046ee9 ("netfilter: nftables: netlink support for several set element expressions") Reported-by: Xingyuan Mo Signed-off-by: Pablo Neira Ayuso (cherry picked from commit 3701cd390fd731ee7ae8b8006246c8db82c72bea) Signed-off-by: Greg Rose --- net/netfilter/nft_dynset.c | 13 +++++++++---- 1 file changed, 9 insertions(+), 4 deletions(-) diff --git a/net/netfilter/nft_dynset.c b/net/netfilter/nft_dynset.c index f407b3e9c1..ae61505d4c 100644 --- a/net/netfilter/nft_dynset.c +++ b/net/netfilter/nft_dynset.c @@ -274,10 +274,15 @@ static int nft_dynset_init(const struct nft_ctx *ctx, priv->expr_array[i] = dynset_expr; priv->num_exprs++; - if (set->num_exprs && - dynset_expr->ops != set->exprs[i]->ops) { - err = -EOPNOTSUPP; - goto err_expr_free; + if (set->num_exprs) { + if (i >= set->num_exprs) { + err = -EINVAL; + goto err_expr_free; + } + if (dynset_expr->ops != set->exprs[i]->ops) { + err = -EOPNOTSUPP; + goto err_expr_free; + } } i++; } From 224cc991d2120a5f86abf71808a363a6145bc66f Mon Sep 17 00:00:00 2001 From: Greg Rose Date: Mon, 4 Nov 2024 11:39:41 -0800 Subject: [PATCH 37/49] netfilter: nf_tables: disallow anonymous set with timeout flag jira VULN-827 cve CVE-2024-26642 commit-author Pablo Neira Ayuso commit 16603605b667b70da974bea8216c93e7db043bf1 Anonymous sets are never used with timeout from userspace, reject this. Exception to this rule is NFT_SET_EVAL to ensure legacy meters still work. Cc: stable@vger.kernel.org Fixes: 761da2935d6e ("netfilter: nf_tables: add set timeout API support") Reported-by: lonial con Signed-off-by: Pablo Neira Ayuso (cherry picked from commit 16603605b667b70da974bea8216c93e7db043bf1) Signed-off-by: Greg Rose --- net/netfilter/nf_tables_api.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/net/netfilter/nf_tables_api.c b/net/netfilter/nf_tables_api.c index d9346fab34..3a49209969 100644 --- a/net/netfilter/nf_tables_api.c +++ b/net/netfilter/nf_tables_api.c @@ -4152,6 +4152,9 @@ static int nf_tables_newset(struct net *net, struct sock *nlsk, if ((flags & (NFT_SET_EVAL | NFT_SET_OBJECT)) == (NFT_SET_EVAL | NFT_SET_OBJECT)) return -EOPNOTSUPP; + if ((flags & (NFT_SET_ANONYMOUS | NFT_SET_TIMEOUT | NFT_SET_EVAL)) == + (NFT_SET_ANONYMOUS | NFT_SET_TIMEOUT)) + return -EOPNOTSUPP; } dtype = 0; From c80ea0322915ec78b9d3dceb2a615081e6da6d05 Mon Sep 17 00:00:00 2001 From: Greg Rose Date: Tue, 5 Nov 2024 14:11:55 -0800 Subject: [PATCH 38/49] netfilter: nf_tables: use timestamp to check for set element timeout jira VULN-7047 cve CVE-2024-27397 commit-author Pablo Neira Ayuso commit 7395dfacfff65e9938ac0889dafa1ab01e987d15 upstream-diff Significant code drift, fuzz, conflicts and every other dumb thing cherry-pick can do while pulling something new into something ancient. Tried to stay true to the resf_kernel-4.18.0-553.8.1.el8_10 tagged code. Add a timestamp field at the beginning of the transaction, store it in the nftables per-netns area. Update set backend .insert, .deactivate and sync gc path to use the timestamp, this avoids that an element expires while control plane transaction is still unfinished. .lookup and .update, which are used from packet path, still use the current time to check if the element has expired. And .get path and dump also since this runs lockless under rcu read size lock. Then, there is async gc which also needs to check the current time since it runs asynchronously from a workqueue. Fixes: c3e1b005ed1c ("netfilter: nf_tables: add set element timeout support") Signed-off-by: Pablo Neira Ayuso (cherry picked from commit 7395dfacfff65e9938ac0889dafa1ab01e987d15) Signed-off-by: Greg Rose Conflicts: include/net/netfilter/nf_tables.h net/netfilter/nf_tables_api.c net/netfilter/nft_set_pipapo.c net/netfilter/nft_set_rbtree.c --- include/net/netfilter/nf_tables.h | 16 ++++++++++++++-- net/netfilter/nf_tables_api.c | 2 ++ net/netfilter/nft_set_hash.c | 8 +++++++- net/netfilter/nft_set_pipapo.c | 18 +++++++++++------- net/netfilter/nft_set_rbtree.c | 21 +++++++++++++-------- 5 files changed, 47 insertions(+), 18 deletions(-) diff --git a/include/net/netfilter/nf_tables.h b/include/net/netfilter/nf_tables.h index 6f5b0acc79..fcad01f4c7 100644 --- a/include/net/netfilter/nf_tables.h +++ b/include/net/netfilter/nf_tables.h @@ -716,10 +716,16 @@ static inline struct nft_set_elem_expr *nft_set_ext_expr(const struct nft_set_ex return nft_set_ext(ext, NFT_SET_EXT_EXPRESSIONS); } -static inline bool nft_set_elem_expired(const struct nft_set_ext *ext) +static inline bool __nft_set_elem_expired(const struct nft_set_ext *ext, + u64 tstamp) { return nft_set_ext_exists(ext, NFT_SET_EXT_EXPIRATION) && - time_is_before_eq_jiffies64(*nft_set_ext_expiration(ext)); + time_after_eq64(tstamp, *nft_set_ext_expiration(ext)); +} + +static inline bool nft_set_elem_expired(const struct nft_set_ext *ext) +{ + return __nft_set_elem_expired(ext, get_jiffies_64()); } static inline struct nft_set_ext *nft_set_elem_ext(const struct nft_set *set, @@ -1534,6 +1540,7 @@ static inline int nft_request_module(struct net *net, const char *fmt, ...) { re #endif struct nftables_pernet { + u64 tstamp; unsigned int gc_seq; }; @@ -1544,4 +1551,9 @@ static inline struct nftables_pernet *nft_pernet(const struct net *net) return net_generic(net, nf_tables_net_id); } +static inline u64 nft_net_tstamp(const struct net *net) +{ + return nft_pernet(net)->tstamp; +} + #endif /* _NET_NF_TABLES_H */ diff --git a/net/netfilter/nf_tables_api.c b/net/netfilter/nf_tables_api.c index 3a49209969..ddccae1d0b 100644 --- a/net/netfilter/nf_tables_api.c +++ b/net/netfilter/nf_tables_api.c @@ -8643,9 +8643,11 @@ static int nf_tables_abort(struct net *net, struct sk_buff *skb, static bool nf_tables_valid_genid(struct net *net, u32 genid) { + struct nftables_pernet *nft_net = nft_pernet(net); bool genid_ok; mutex_lock(&net->nft_commit_mutex); + nft_net->tstamp = get_jiffies_64(); genid_ok = genid == 0 || net->nft.base_seq == genid; if (!genid_ok) diff --git a/net/netfilter/nft_set_hash.c b/net/netfilter/nft_set_hash.c index 24467ab02e..886ff22420 100644 --- a/net/netfilter/nft_set_hash.c +++ b/net/netfilter/nft_set_hash.c @@ -38,6 +38,7 @@ struct nft_rhash_cmp_arg { const struct nft_set *set; const u32 *key; u8 genmask; + u64 tstamp; }; static inline u32 nft_rhash_key(const void *data, u32 len, u32 seed) @@ -64,7 +65,7 @@ static inline int nft_rhash_cmp(struct rhashtable_compare_arg *arg, return 1; if (nft_set_elem_is_dead(&he->ext)) return 1; - if (nft_set_elem_expired(&he->ext)) + if (__nft_set_elem_expired(&he->ext, x->tstamp)) return 1; if (!nft_set_elem_active(&he->ext, x->genmask)) return 1; @@ -88,6 +89,7 @@ static bool nft_rhash_lookup(const struct net *net, const struct nft_set *set, .genmask = nft_genmask_cur(net), .set = set, .key = key, + .tstamp = get_jiffies_64(), }; he = rhashtable_lookup(&priv->ht, &arg, nft_rhash_params); @@ -106,6 +108,7 @@ static void *nft_rhash_get(const struct net *net, const struct nft_set *set, .genmask = nft_genmask_cur(net), .set = set, .key = elem->key.val.data, + .tstamp = get_jiffies_64(), }; he = rhashtable_lookup(&priv->ht, &arg, nft_rhash_params); @@ -129,6 +132,7 @@ static bool nft_rhash_update(struct nft_set *set, const u32 *key, .genmask = NFT_GENMASK_ANY, .set = set, .key = key, + .tstamp = get_jiffies_64(), }; he = rhashtable_lookup(&priv->ht, &arg, nft_rhash_params); @@ -172,6 +176,7 @@ static int nft_rhash_insert(const struct net *net, const struct nft_set *set, .genmask = nft_genmask_next(net), .set = set, .key = elem->key.val.data, + .tstamp = nft_net_tstamp(net), }; struct nft_rhash_elem *prev; @@ -212,6 +217,7 @@ static void *nft_rhash_deactivate(const struct net *net, .genmask = nft_genmask_next(net), .set = set, .key = elem->key.val.data, + .tstamp = nft_net_tstamp(net), }; rcu_read_lock(); diff --git a/net/netfilter/nft_set_pipapo.c b/net/netfilter/nft_set_pipapo.c index 63263bf948..3f3ce6b907 100644 --- a/net/netfilter/nft_set_pipapo.c +++ b/net/netfilter/nft_set_pipapo.c @@ -635,6 +635,7 @@ static bool nft_pipapo_lookup(const struct net *net, const struct nft_set *set, * @set: nftables API set representation * @data: Key data to be matched against existing elements * @genmask: If set, check that element is active in given genmask + * @tstamp: timestamp to check for expired elements * * This is essentially the same as the lookup function, except that it matches * key data against the uncommitted copy and doesn't use preallocated maps for @@ -644,7 +645,8 @@ static bool nft_pipapo_lookup(const struct net *net, const struct nft_set *set, */ static struct nft_pipapo_elem *pipapo_get(const struct net *net, const struct nft_set *set, - const u8 *data, u8 genmask) + const u8 *data, u8 genmask, + u64 tstamp) { struct nft_pipapo_elem *ret = ERR_PTR(-ENOENT); struct nft_pipapo *priv = nft_set_priv(set); @@ -704,7 +706,7 @@ static struct nft_pipapo_elem *pipapo_get(const struct net *net, goto out; if (last) { - if (nft_set_elem_expired(&f->mt[b].e->ext)) + if (__nft_set_elem_expired(&f->mt[b].e->ext, tstamp)) goto next_match; if ((genmask && !nft_set_elem_active(&f->mt[b].e->ext, genmask))) @@ -741,7 +743,7 @@ void *nft_pipapo_get(const struct net *net, const struct nft_set *set, const struct nft_set_elem *elem, unsigned int flags) { return pipapo_get(net, set, (const u8 *)elem->key.val.data, - nft_genmask_cur(net)); + nft_genmask_cur(net), get_jiffies_64()); } /** @@ -1100,14 +1102,15 @@ static int nft_pipapo_insert(const struct net *net, const struct nft_set *set, struct nft_pipapo *priv = nft_set_priv(set); struct nft_pipapo_match *m = priv->clone; u8 genmask = nft_genmask_next(net); + u64 tstamp = nft_net_tstamp(net); struct nft_pipapo_field *f; int i, bsize_max, err = 0; - dup = pipapo_get(net, set, start, genmask); + dup = pipapo_get(net, set, start, genmask, tstamp); if (PTR_ERR(dup) == -ENOENT) { if (nft_set_ext_exists(ext, NFT_SET_EXT_KEY_END)) { end = (const u8 *)nft_set_ext_key_end(ext)->data; - dup = pipapo_get(net, set, end, nft_genmask_next(net)); + dup = pipapo_get(net, set, end, nft_genmask_next(net), tstamp); } else { end = start; } @@ -1457,6 +1460,7 @@ static void pipapo_gc(const struct nft_set *_set, struct nft_pipapo_match *m) struct nft_set *set = (struct nft_set *) _set; struct nft_pipapo *priv = nft_set_priv(set); struct net *net = read_pnet(&set->net); + u64 tstamp = nft_net_tstamp(net); int rules_f0, first_rule = 0; struct nft_pipapo_elem *e; struct nft_trans_gc *gc; @@ -1491,7 +1495,7 @@ static void pipapo_gc(const struct nft_set *_set, struct nft_pipapo_match *m) /* synchronous gc never fails, there is no need to set on * NFT_SET_ELEM_DEAD_BIT. */ - if (nft_set_elem_expired(&e->ext)) { + if (__nft_set_elem_expired(&e->ext, tstamp)) { priv->dirty = true; gc = nft_trans_gc_queue_sync(gc, GFP_ATOMIC); @@ -1662,7 +1666,7 @@ static void *pipapo_deactivate(const struct net *net, const struct nft_set *set, { struct nft_pipapo_elem *e; - e = pipapo_get(net, set, data, nft_genmask_next(net)); + e = pipapo_get(net, set, data, nft_genmask_next(net), nft_net_tstamp(net)); if (IS_ERR(e)) return NULL; diff --git a/net/netfilter/nft_set_rbtree.c b/net/netfilter/nft_set_rbtree.c index a2d0d18630..51180c1e45 100644 --- a/net/netfilter/nft_set_rbtree.c +++ b/net/netfilter/nft_set_rbtree.c @@ -229,6 +229,7 @@ static int __nft_rbtree_insert(const struct net *net, const struct nft_set *set, { struct nft_rbtree *priv = nft_set_priv(set); u8 genmask = nft_genmask_next(net); + u64 tstamp = nft_net_tstamp(net); struct nft_rbtree_elem *rbe; struct rb_node *parent, **p; bool overlap = false; @@ -287,13 +288,13 @@ static int __nft_rbtree_insert(const struct net *net, const struct nft_set *set, if (nft_rbtree_interval_start(new)) { if (nft_rbtree_interval_end(rbe) && nft_set_elem_active(&rbe->ext, genmask) && - !nft_set_elem_expired(&rbe->ext) && !*p) + !__nft_set_elem_expired(&rbe->ext, tstamp) && !*p) overlap = false; } else { overlap = nft_rbtree_interval_end(rbe) && nft_set_elem_active(&rbe->ext, genmask) && - !nft_set_elem_expired(&rbe->ext); + !__nft_set_elem_expired(&rbe->ext, tstamp); } } else if (d > 0) { p = &parent->rb_right; @@ -302,9 +303,9 @@ static int __nft_rbtree_insert(const struct net *net, const struct nft_set *set, overlap = nft_rbtree_interval_end(rbe) && nft_set_elem_active(&rbe->ext, genmask) && - !nft_set_elem_expired(&rbe->ext); + !__nft_set_elem_expired(&rbe->ext, tstamp); } else if (nft_set_elem_active(&rbe->ext, genmask) && - !nft_set_elem_expired(&rbe->ext)) { + !__nft_set_elem_expired(&rbe->ext, tstamp)) { overlap = nft_rbtree_interval_end(rbe); } } else { @@ -313,17 +314,17 @@ static int __nft_rbtree_insert(const struct net *net, const struct nft_set *set, p = &parent->rb_left; if (nft_set_elem_active(&rbe->ext, genmask) && - !nft_set_elem_expired(&rbe->ext)) + !__nft_set_elem_expired(&rbe->ext, tstamp)) overlap = false; } else if (nft_rbtree_interval_start(rbe) && nft_rbtree_interval_end(new)) { p = &parent->rb_right; if (nft_set_elem_active(&rbe->ext, genmask) && - !nft_set_elem_expired(&rbe->ext)) + !__nft_set_elem_expired(&rbe->ext, tstamp)) overlap = false; } else if (nft_set_elem_active(&rbe->ext, genmask) && - !nft_set_elem_expired(&rbe->ext)) { + !__nft_set_elem_expired(&rbe->ext, tstamp)) { *ext = &rbe->ext; return -EEXIST; } else { @@ -400,6 +401,7 @@ static void *nft_rbtree_deactivate(const struct net *net, const struct rb_node *parent = priv->root.rb_node; struct nft_rbtree_elem *rbe, *this = elem->priv; u8 genmask = nft_genmask_next(net); + u64 tstamp = nft_net_tstamp(net); int d; while (parent != NULL) { @@ -420,6 +422,8 @@ static void *nft_rbtree_deactivate(const struct net *net, nft_rbtree_interval_end(this)) { parent = parent->rb_right; continue; + } else if (__nft_set_elem_expired(&rbe->ext, tstamp)) { + break; } else if (!nft_set_elem_active(&rbe->ext, genmask)) { parent = parent->rb_left; continue; @@ -472,6 +476,7 @@ static void nft_rbtree_gc(struct work_struct *work) struct nft_set *set; unsigned int gc_seq; struct net *net; + u64 tstamp; priv = container_of(work, struct nft_rbtree, gc_work.work); set = nft_set_container_of(priv); @@ -510,7 +515,7 @@ static void nft_rbtree_gc(struct work_struct *work) rbe_end = rbe; continue; } - if (!nft_set_elem_expired(&rbe->ext)) + if (!__nft_set_elem_expired(&rbe->ext, tstamp)) continue; nft_set_elem_dead(&rbe->ext); From e37bce7b437a3e55da9819570775fa7b933eed43 Mon Sep 17 00:00:00 2001 From: Greg Rose Date: Fri, 8 Nov 2024 08:50:25 -0800 Subject: [PATCH 39/49] netfilter: nf_tables: honor table dormant flag from netdev release event path jira VULN-5238 cve CVE-2024-36005 commit-author Pablo Neira Ayuso commit 8e30abc9ace4f0add4cd761dfdbfaebae5632dd2 Check for table dormant flag otherwise netdev release event path tries to unregister an already unregistered hook. [524854.857999] ------------[ cut here ]------------ [524854.858010] WARNING: CPU: 0 PID: 3386599 at net/netfilter/core.c:501 __nf_unregister_net_hook+0x21a/0x260 [...] [524854.858848] CPU: 0 PID: 3386599 Comm: kworker/u32:2 Not tainted 6.9.0-rc3+ #365 [524854.858869] Workqueue: netns cleanup_net [524854.858886] RIP: 0010:__nf_unregister_net_hook+0x21a/0x260 [524854.858903] Code: 24 e8 aa 73 83 ff 48 63 43 1c 83 f8 01 0f 85 3d ff ff ff e8 98 d1 f0 ff 48 8b 3c 24 e8 8f 73 83 ff 48 63 43 1c e9 26 ff ff ff <0f> 0b 48 83 c4 18 48 c7 c7 00 68 e9 82 5b 5d 41 5c 41 5d 41 5e 41 [524854.858914] RSP: 0018:ffff8881e36d79e0 EFLAGS: 00010246 [524854.858926] RAX: 0000000000000000 RBX: ffff8881339ae790 RCX: ffffffff81ba524a [524854.858936] RDX: dffffc0000000000 RSI: 0000000000000008 RDI: ffff8881c8a16438 [524854.858945] RBP: ffff8881c8a16438 R08: 0000000000000001 R09: ffffed103c6daf34 [524854.858954] R10: ffff8881e36d79a7 R11: 0000000000000000 R12: 0000000000000005 [524854.858962] R13: ffff8881c8a16000 R14: 0000000000000000 R15: ffff8881351b5a00 [524854.858971] FS: 0000000000000000(0000) GS:ffff888390800000(0000) knlGS:0000000000000000 [524854.858982] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 [524854.858991] CR2: 00007fc9be0f16f4 CR3: 00000001437cc004 CR4: 00000000001706f0 [524854.859000] Call Trace: [524854.859006] [524854.859013] ? __warn+0x9f/0x1a0 [524854.859027] ? __nf_unregister_net_hook+0x21a/0x260 [524854.859044] ? report_bug+0x1b1/0x1e0 [524854.859060] ? handle_bug+0x3c/0x70 [524854.859071] ? exc_invalid_op+0x17/0x40 [524854.859083] ? asm_exc_invalid_op+0x1a/0x20 [524854.859100] ? __nf_unregister_net_hook+0x6a/0x260 [524854.859116] ? __nf_unregister_net_hook+0x21a/0x260 [524854.859135] nf_tables_netdev_event+0x337/0x390 [nf_tables] [524854.859304] ? __pfx_nf_tables_netdev_event+0x10/0x10 [nf_tables] [524854.859461] ? packet_notifier+0xb3/0x360 [524854.859476] ? _raw_spin_unlock_irqrestore+0x11/0x40 [524854.859489] ? dcbnl_netdevice_event+0x35/0x140 [524854.859507] ? __pfx_nf_tables_netdev_event+0x10/0x10 [nf_tables] [524854.859661] notifier_call_chain+0x7d/0x140 [524854.859677] unregister_netdevice_many_notify+0x5e1/0xae0 Fixes: d54725cd11a5 ("netfilter: nf_tables: support for multiple devices per netdev hook") Signed-off-by: Pablo Neira Ayuso (cherry picked from commit 8e30abc9ace4f0add4cd761dfdbfaebae5632dd2) Signed-off-by: Greg Rose --- net/netfilter/nft_chain_filter.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/net/netfilter/nft_chain_filter.c b/net/netfilter/nft_chain_filter.c index 290b4b65e5..512836e887 100644 --- a/net/netfilter/nft_chain_filter.c +++ b/net/netfilter/nft_chain_filter.c @@ -303,7 +303,9 @@ static void nft_netdev_event(unsigned long event, struct net_device *dev, return; if (n > 1) { - nf_unregister_net_hook(ctx->net, &found->ops); + if (!(ctx->chain->table->flags & NFT_TABLE_F_DORMANT)) + nf_unregister_net_hook(ctx->net, &found->ops); + list_del_rcu(&found->list); kfree_rcu(found, rcu); return; From 032cc7dee8b411e6ec72d471d43353ede716a2b2 Mon Sep 17 00:00:00 2001 From: Greg Rose Date: Fri, 8 Nov 2024 10:45:34 -0800 Subject: [PATCH 40/49] netfilter: nf_tables: release batch on table validation from abort path jira VULN-4969 subsystem-sync netfilter:nf_tables 4.18.0-553.16.1 commit-author Pablo Neira Ayuso commit a45e6889575c2067d3c0212b6bc1022891e65b91 Unlike early commit path stage which triggers a call to abort, an explicit release of the batch is required on abort, otherwise mutex is released and commit_list remains in place. Add WARN_ON_ONCE to ensure commit_list is empty from the abort path before releasing the mutex. After this patch, commit_list is always assumed to be empty before grabbing the mutex, therefore 03c1f1ef1584 ("netfilter: Cleanup nft_net->module_list from nf_tables_exit_net()") only needs to release the pending modules for registration. Cc: stable@vger.kernel.org Fixes: c0391b6ab810 ("netfilter: nf_tables: missing validation from the abort path") Signed-off-by: Pablo Neira Ayuso (cherry picked from commit a45e6889575c2067d3c0212b6bc1022891e65b91) Signed-off-by: Greg Rose --- net/netfilter/nf_tables_api.c | 14 +++++++++----- 1 file changed, 9 insertions(+), 5 deletions(-) diff --git a/net/netfilter/nf_tables_api.c b/net/netfilter/nf_tables_api.c index ddccae1d0b..7d5ff6d268 100644 --- a/net/netfilter/nf_tables_api.c +++ b/net/netfilter/nf_tables_api.c @@ -8465,10 +8465,11 @@ static int __nf_tables_abort(struct net *net, enum nfnl_abort_action action) struct nft_trans *trans, *next; LIST_HEAD(set_update_list); struct nft_trans_elem *te; + int err = 0; if (action == NFNL_ABORT_VALIDATE && nf_tables_validate(net) < 0) - return -EAGAIN; + err = -EAGAIN; list_for_each_entry_safe_reverse(trans, next, &net->nft.commit_list, list) { @@ -8617,7 +8618,7 @@ static int __nf_tables_abort(struct net *net, enum nfnl_abort_action action) else nf_tables_module_autoload_cleanup(net); - return 0; + return err; } static void nf_tables_cleanup(struct net *net) @@ -8636,6 +8637,8 @@ static int nf_tables_abort(struct net *net, struct sk_buff *skb, ret = __nf_tables_abort(net, action); nft_gc_seq_end(nft_net, gc_seq); + WARN_ON_ONCE(!list_empty(&net->nft.commit_list)); + mutex_unlock(&net->nft_commit_mutex); return ret; @@ -9287,9 +9290,10 @@ static void __net_exit nf_tables_exit_net(struct net *net) gc_seq = nft_gc_seq_begin(nft_net); - if (!list_empty(&net->nft.commit_list) || - !list_empty(&net->nft_module_list)) - __nf_tables_abort(net, NFNL_ABORT_NONE); + WARN_ON_ONCE(!list_empty(&net->nft.commit_list)); + + if (!list_empty(&net->nft_module_list)) + nf_tables_module_autoload_cleanup(net); __nft_release_tables(net); From c792aca5c54e4bd6e765c1145b4e13ba8a4aa8ac Mon Sep 17 00:00:00 2001 From: Greg Rose Date: Fri, 8 Nov 2024 11:03:10 -0800 Subject: [PATCH 41/49] netfilter: nf_tables: release mutex after nft_gc_seq_end from abort path jira VULN-4905 cve CVE-2024-26925 commit-author Pablo Neira Ayuso commit 0d459e2ffb541841714839e8228b845458ed3b27 The commit mutex should not be released during the critical section between nft_gc_seq_begin() and nft_gc_seq_end(), otherwise, async GC worker could collect expired objects and get the released commit lock within the same GC sequence. nf_tables_module_autoload() temporarily releases the mutex to load module dependencies, then it goes back to replay the transaction again. Move it at the end of the abort phase after nft_gc_seq_end() is called. Cc: stable@vger.kernel.org Fixes: 720344340fb9 ("netfilter: nf_tables: GC transaction race with abort path") Reported-by: Kuan-Ting Chen Signed-off-by: Pablo Neira Ayuso (cherry picked from commit 0d459e2ffb541841714839e8228b845458ed3b27) Signed-off-by: Greg Rose --- net/netfilter/nf_tables_api.c | 13 ++++++++----- 1 file changed, 8 insertions(+), 5 deletions(-) diff --git a/net/netfilter/nf_tables_api.c b/net/netfilter/nf_tables_api.c index 7d5ff6d268..35bfe3da32 100644 --- a/net/netfilter/nf_tables_api.c +++ b/net/netfilter/nf_tables_api.c @@ -8613,11 +8613,6 @@ static int __nf_tables_abort(struct net *net, enum nfnl_abort_action action) nf_tables_abort_release(trans); } - if (action == NFNL_ABORT_AUTOLOAD) - nf_tables_module_autoload(net); - else - nf_tables_module_autoload_cleanup(net); - return err; } @@ -8639,6 +8634,14 @@ static int nf_tables_abort(struct net *net, struct sk_buff *skb, WARN_ON_ONCE(!list_empty(&net->nft.commit_list)); + /* module autoload needs to happen after GC sequence update because it + * temporarily releases and grabs mutex again. + */ + if (action == NFNL_ABORT_AUTOLOAD) + nf_tables_module_autoload(net); + else + nf_tables_module_autoload_cleanup(net); + mutex_unlock(&net->nft_commit_mutex); return ret; From 2e6ba1c4431b230661240ec6729b5c6cec38d6ff Mon Sep 17 00:00:00 2001 From: Greg Rose Date: Fri, 8 Nov 2024 11:07:19 -0800 Subject: [PATCH 42/49] netfilter: nf_tables: __nft_expr_type_get() selects specific family type jira VULN-4969 subsystem-sync netfilter:nf_tables 4.18.0-553.16.1 commit-author Pablo Neira Ayuso commit 9cff126f73a7025bcb0883189b2bed90010a57d4 In case that there are two types, prefer the family specify extension. Signed-off-by: Pablo Neira Ayuso (cherry picked from commit 9cff126f73a7025bcb0883189b2bed90010a57d4) Signed-off-by: Greg Rose --- net/netfilter/nf_tables_api.c | 13 ++++++++----- 1 file changed, 8 insertions(+), 5 deletions(-) diff --git a/net/netfilter/nf_tables_api.c b/net/netfilter/nf_tables_api.c index 35bfe3da32..ad349a3d9a 100644 --- a/net/netfilter/nf_tables_api.c +++ b/net/netfilter/nf_tables_api.c @@ -2460,14 +2460,17 @@ EXPORT_SYMBOL_GPL(nft_unregister_expr); static const struct nft_expr_type *__nft_expr_type_get(u8 family, struct nlattr *nla) { - const struct nft_expr_type *type; + const struct nft_expr_type *type, *candidate = NULL; list_for_each_entry(type, &nf_tables_expressions, list) { - if (!nla_strcmp(nla, type->name) && - (!type->family || type->family == family)) - return type; + if (!nla_strcmp(nla, type->name)) { + if (!type->family && !candidate) + candidate = type; + else if (type->family == family) + candidate = type; + } } - return NULL; + return candidate; } #ifdef CONFIG_MODULES From 3d347ad3c99429bbbe9951568df34ff0713db474 Mon Sep 17 00:00:00 2001 From: Greg Rose Date: Fri, 8 Nov 2024 12:54:52 -0800 Subject: [PATCH 43/49] netfilter: nf_tables: Fix potential data-race in __nft_expr_type_get() jira VULN-4969 cve CVE-2024-27020 commit-author Ziyang Xuan commit f969eb84ce482331a991079ab7a5c4dc3b7f89bf nft_unregister_expr() can concurrent with __nft_expr_type_get(), and there is not any protection when iterate over nf_tables_expressions list in __nft_expr_type_get(). Therefore, there is potential data-race of nf_tables_expressions list entry. Use list_for_each_entry_rcu() to iterate over nf_tables_expressions list in __nft_expr_type_get(), and use rcu_read_lock() in the caller nft_expr_type_get() to protect the entire type query process. Fixes: ef1f7df9170d ("netfilter: nf_tables: expression ops overloading") Signed-off-by: Ziyang Xuan Signed-off-by: Pablo Neira Ayuso (cherry picked from commit f969eb84ce482331a991079ab7a5c4dc3b7f89bf) Signed-off-by: Greg Rose --- net/netfilter/nf_tables_api.c | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/net/netfilter/nf_tables_api.c b/net/netfilter/nf_tables_api.c index ad349a3d9a..9d1c5f0e25 100644 --- a/net/netfilter/nf_tables_api.c +++ b/net/netfilter/nf_tables_api.c @@ -2462,7 +2462,7 @@ static const struct nft_expr_type *__nft_expr_type_get(u8 family, { const struct nft_expr_type *type, *candidate = NULL; - list_for_each_entry(type, &nf_tables_expressions, list) { + list_for_each_entry_rcu(type, &nf_tables_expressions, list) { if (!nla_strcmp(nla, type->name)) { if (!type->family && !candidate) candidate = type; @@ -2494,9 +2494,13 @@ static const struct nft_expr_type *nft_expr_type_get(struct net *net, if (nla == NULL) return ERR_PTR(-EINVAL); + rcu_read_lock(); type = __nft_expr_type_get(family, nla); - if (type != NULL && try_module_get(type->owner)) + if (type != NULL && try_module_get(type->owner)) { + rcu_read_unlock(); return type; + } + rcu_read_unlock(); lockdep_nfnl_nft_mutex_not_held(); #ifdef CONFIG_MODULES From 004ecc1e56ddf46e31b9fd501db20c53f9ecb6cf Mon Sep 17 00:00:00 2001 From: Greg Rose Date: Fri, 8 Nov 2024 13:13:26 -0800 Subject: [PATCH 44/49] netfilter: nf_tables: Fix potential data-race in __nft_obj_type_get() jira VULN-4961 cve CVE-2024-27019 commit-author Ziyang Xuan commit d78d867dcea69c328db30df665be5be7d0148484 upstream-diff The cherry-pick tried to pull in extra cruft not part of the upstream patch. I have resolved the conflicts in favor of the 4.18.0-553.16.1 tagged code. nft_unregister_obj() can concurrent with __nft_obj_type_get(), and there is not any protection when iterate over nf_tables_objects list in __nft_obj_type_get(). Therefore, there is potential data-race of nf_tables_objects list entry. Use list_for_each_entry_rcu() to iterate over nf_tables_objects list in __nft_obj_type_get(), and use rcu_read_lock() in the caller nft_obj_type_get() to protect the entire type query process. Fixes: e50092404c1b ("netfilter: nf_tables: add stateful objects") Signed-off-by: Ziyang Xuan Signed-off-by: Pablo Neira Ayuso (cherry picked from commit d78d867dcea69c328db30df665be5be7d0148484) Signed-off-by: Greg Rose Conflicts: net/netfilter/nf_tables_api.c --- net/netfilter/nf_tables_api.c | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/net/netfilter/nf_tables_api.c b/net/netfilter/nf_tables_api.c index 9d1c5f0e25..b97cc85465 100644 --- a/net/netfilter/nf_tables_api.c +++ b/net/netfilter/nf_tables_api.c @@ -6137,7 +6137,7 @@ static const struct nft_object_type *__nft_obj_type_get(u32 objtype) { const struct nft_object_type *type; - list_for_each_entry(type, &nf_tables_objects, list) { + list_for_each_entry_rcu(type, &nf_tables_objects, list) { if (objtype == type->type) return type; } @@ -6149,9 +6149,13 @@ nft_obj_type_get(struct net *net, u32 objtype) { const struct nft_object_type *type; + rcu_read_lock(); type = __nft_obj_type_get(objtype); - if (type != NULL && try_module_get(type->owner)) + if (type != NULL && try_module_get(type->owner)) { + rcu_read_unlock(); return type; + } + rcu_read_unlock(); lockdep_nfnl_nft_mutex_not_held(); #ifdef CONFIG_MODULES From e788218bfb05be6126240c8e31cf4322e466302e Mon Sep 17 00:00:00 2001 From: Greg Rose Date: Fri, 8 Nov 2024 13:45:30 -0800 Subject: [PATCH 45/49] netfilter: nf_tables: do not compare internal table flags on updates jira VULN-4985 cve CVE-2024-27065 commit-author Pablo Neira Ayuso commit 4a0e7f2decbf9bd72461226f1f5f7dcc4b08f139 Restore skipping transaction if table update does not modify flags. Fixes: 179d9ba5559a ("netfilter: nf_tables: fix table flag updates") Signed-off-by: Pablo Neira Ayuso (cherry picked from commit 4a0e7f2decbf9bd72461226f1f5f7dcc4b08f139) Signed-off-by: Greg Rose --- net/netfilter/nf_tables_api.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/netfilter/nf_tables_api.c b/net/netfilter/nf_tables_api.c index b97cc85465..d615840ddc 100644 --- a/net/netfilter/nf_tables_api.c +++ b/net/netfilter/nf_tables_api.c @@ -967,7 +967,7 @@ static int nf_tables_updtable(struct nft_ctx *ctx) if (flags & ~NFT_TABLE_F_DORMANT) return -EINVAL; - if (flags == ctx->table->flags) + if (flags == (ctx->table->flags & NFT_TABLE_F_MASK)) return 0; /* No dormant off/on/off/on games in single transaction */ From f8d9f6b156a4858967ee7add1f2fd36c197632ed Mon Sep 17 00:00:00 2001 From: Greg Rose Date: Fri, 8 Nov 2024 13:46:00 -0800 Subject: [PATCH 46/49] netfilter: nf_tables: flush pending destroy work before exit_net release jira VULN-5126 cve CVE-2024-35899 commit-author Pablo Neira Ayuso commit 24cea9677025e0de419989ecb692acd4bb34cac2 Similar to 2c9f0293280e ("netfilter: nf_tables: flush pending destroy work before netlink notifier") to address a race between exit_net and the destroy workqueue. The trace below shows an element to be released via destroy workqueue while exit_net path (triggered via module removal) has already released the set that is used in such transaction. [ 1360.547789] BUG: KASAN: slab-use-after-free in nf_tables_trans_destroy_work+0x3f5/0x590 [nf_tables] [ 1360.547861] Read of size 8 at addr ffff888140500cc0 by task kworker/4:1/152465 [ 1360.547870] CPU: 4 PID: 152465 Comm: kworker/4:1 Not tainted 6.8.0+ #359 [ 1360.547882] Workqueue: events nf_tables_trans_destroy_work [nf_tables] [ 1360.547984] Call Trace: [ 1360.547991] [ 1360.547998] dump_stack_lvl+0x53/0x70 [ 1360.548014] print_report+0xc4/0x610 [ 1360.548026] ? __virt_addr_valid+0xba/0x160 [ 1360.548040] ? __pfx__raw_spin_lock_irqsave+0x10/0x10 [ 1360.548054] ? nf_tables_trans_destroy_work+0x3f5/0x590 [nf_tables] [ 1360.548176] kasan_report+0xae/0xe0 [ 1360.548189] ? nf_tables_trans_destroy_work+0x3f5/0x590 [nf_tables] [ 1360.548312] nf_tables_trans_destroy_work+0x3f5/0x590 [nf_tables] [ 1360.548447] ? __pfx_nf_tables_trans_destroy_work+0x10/0x10 [nf_tables] [ 1360.548577] ? _raw_spin_unlock_irq+0x18/0x30 [ 1360.548591] process_one_work+0x2f1/0x670 [ 1360.548610] worker_thread+0x4d3/0x760 [ 1360.548627] ? __pfx_worker_thread+0x10/0x10 [ 1360.548640] kthread+0x16b/0x1b0 [ 1360.548653] ? __pfx_kthread+0x10/0x10 [ 1360.548665] ret_from_fork+0x2f/0x50 [ 1360.548679] ? __pfx_kthread+0x10/0x10 [ 1360.548690] ret_from_fork_asm+0x1a/0x30 [ 1360.548707] [ 1360.548719] Allocated by task 192061: [ 1360.548726] kasan_save_stack+0x20/0x40 [ 1360.548739] kasan_save_track+0x14/0x30 [ 1360.548750] __kasan_kmalloc+0x8f/0xa0 [ 1360.548760] __kmalloc_node+0x1f1/0x450 [ 1360.548771] nf_tables_newset+0x10c7/0x1b50 [nf_tables] [ 1360.548883] nfnetlink_rcv_batch+0xbc4/0xdc0 [nfnetlink] [ 1360.548909] nfnetlink_rcv+0x1a8/0x1e0 [nfnetlink] [ 1360.548927] netlink_unicast+0x367/0x4f0 [ 1360.548935] netlink_sendmsg+0x34b/0x610 [ 1360.548944] ____sys_sendmsg+0x4d4/0x510 [ 1360.548953] ___sys_sendmsg+0xc9/0x120 [ 1360.548961] __sys_sendmsg+0xbe/0x140 [ 1360.548971] do_syscall_64+0x55/0x120 [ 1360.548982] entry_SYSCALL_64_after_hwframe+0x55/0x5d [ 1360.548994] Freed by task 192222: [ 1360.548999] kasan_save_stack+0x20/0x40 [ 1360.549009] kasan_save_track+0x14/0x30 [ 1360.549019] kasan_save_free_info+0x3b/0x60 [ 1360.549028] poison_slab_object+0x100/0x180 [ 1360.549036] __kasan_slab_free+0x14/0x30 [ 1360.549042] kfree+0xb6/0x260 [ 1360.549049] __nft_release_table+0x473/0x6a0 [nf_tables] [ 1360.549131] nf_tables_exit_net+0x170/0x240 [nf_tables] [ 1360.549221] ops_exit_list+0x50/0xa0 [ 1360.549229] free_exit_list+0x101/0x140 [ 1360.549236] unregister_pernet_operations+0x107/0x160 [ 1360.549245] unregister_pernet_subsys+0x1c/0x30 [ 1360.549254] nf_tables_module_exit+0x43/0x80 [nf_tables] [ 1360.549345] __do_sys_delete_module+0x253/0x370 [ 1360.549352] do_syscall_64+0x55/0x120 [ 1360.549360] entry_SYSCALL_64_after_hwframe+0x55/0x5d (gdb) list *__nft_release_table+0x473 0x1e033 is in __nft_release_table (net/netfilter/nf_tables_api.c:11354). 11349 list_for_each_entry_safe(flowtable, nf, &table->flowtables, list) { 11350 list_del(&flowtable->list); 11351 nft_use_dec(&table->use); 11352 nf_tables_flowtable_destroy(flowtable); 11353 } 11354 list_for_each_entry_safe(set, ns, &table->sets, list) { 11355 list_del(&set->list); 11356 nft_use_dec(&table->use); 11357 if (set->flags & (NFT_SET_MAP | NFT_SET_OBJECT)) 11358 nft_map_deactivate(&ctx, set); (gdb) [ 1360.549372] Last potentially related work creation: [ 1360.549376] kasan_save_stack+0x20/0x40 [ 1360.549384] __kasan_record_aux_stack+0x9b/0xb0 [ 1360.549392] __queue_work+0x3fb/0x780 [ 1360.549399] queue_work_on+0x4f/0x60 [ 1360.549407] nft_rhash_remove+0x33b/0x340 [nf_tables] [ 1360.549516] nf_tables_commit+0x1c6a/0x2620 [nf_tables] [ 1360.549625] nfnetlink_rcv_batch+0x728/0xdc0 [nfnetlink] [ 1360.549647] nfnetlink_rcv+0x1a8/0x1e0 [nfnetlink] [ 1360.549671] netlink_unicast+0x367/0x4f0 [ 1360.549680] netlink_sendmsg+0x34b/0x610 [ 1360.549690] ____sys_sendmsg+0x4d4/0x510 [ 1360.549697] ___sys_sendmsg+0xc9/0x120 [ 1360.549706] __sys_sendmsg+0xbe/0x140 [ 1360.549715] do_syscall_64+0x55/0x120 [ 1360.549725] entry_SYSCALL_64_after_hwframe+0x55/0x5d Fixes: 0935d5588400 ("netfilter: nf_tables: asynchronous release") Signed-off-by: Pablo Neira Ayuso (cherry picked from commit 24cea9677025e0de419989ecb692acd4bb34cac2) Signed-off-by: Greg Rose --- net/netfilter/nf_tables_api.c | 1 + 1 file changed, 1 insertion(+) diff --git a/net/netfilter/nf_tables_api.c b/net/netfilter/nf_tables_api.c index d615840ddc..a50fa4c308 100644 --- a/net/netfilter/nf_tables_api.c +++ b/net/netfilter/nf_tables_api.c @@ -9392,6 +9392,7 @@ static void __exit nf_tables_module_exit(void) unregister_netdevice_notifier(&nf_tables_flowtable_notifier); nft_chain_filter_fini(); nft_chain_route_fini(); + nf_tables_trans_destroy_flush_work(); unregister_pernet_subsys(&nf_tables_net_ops); cancel_work_sync(&trans_gc_work); cancel_work_sync(&trans_destroy_work); From 3bd9bb39b85c72bfe125096c81bf1e6e95fbf30c Mon Sep 17 00:00:00 2001 From: Greg Rose Date: Fri, 8 Nov 2024 14:15:22 -0800 Subject: [PATCH 47/49] netfilter: nf_tables: reject new basechain after table flag update jira VULN-5134 cve CVE-2024-35900 commit-author Pablo Neira Ayuso commit 994209ddf4f430946f6247616b2e33d179243769 upstream-diff Fixed up a couple of small conflicts introduced by cherry picking from a very new source back into an ancient source. When dormant flag is toggled, hooks are disabled in the commit phase by iterating over current chains in table (existing and new). The following configuration allows for an inconsistent state: add table x add chain x y { type filter hook input priority 0; } add table x { flags dormant; } add chain x w { type filter hook input priority 1; } which triggers the following warning when trying to unregister chain w which is already unregistered. [ 127.322252] WARNING: CPU: 7 PID: 1211 at net/netfilter/core.c:50 1 __nf_unregister_net_hook+0x21a/0x260 [...] [ 127.322519] Call Trace: [ 127.322521] [ 127.322524] ? __warn+0x9f/0x1a0 [ 127.322531] ? __nf_unregister_net_hook+0x21a/0x260 [ 127.322537] ? report_bug+0x1b1/0x1e0 [ 127.322545] ? handle_bug+0x3c/0x70 [ 127.322552] ? exc_invalid_op+0x17/0x40 [ 127.322556] ? asm_exc_invalid_op+0x1a/0x20 [ 127.322563] ? kasan_save_free_info+0x3b/0x60 [ 127.322570] ? __nf_unregister_net_hook+0x6a/0x260 [ 127.322577] ? __nf_unregister_net_hook+0x21a/0x260 [ 127.322583] ? __nf_unregister_net_hook+0x6a/0x260 [ 127.322590] ? __nf_tables_unregister_hook+0x8a/0xe0 [nf_tables] [ 127.322655] nft_table_disable+0x75/0xf0 [nf_tables] [ 127.322717] nf_tables_commit+0x2571/0x2620 [nf_tables] Fixes: 179d9ba5559a ("netfilter: nf_tables: fix table flag updates") Signed-off-by: Pablo Neira Ayuso (cherry picked from commit 994209ddf4f430946f6247616b2e33d179243769) Signed-off-by: Greg Rose Conflicts: net/netfilter/nf_tables_api.c --- net/netfilter/nf_tables_api.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/net/netfilter/nf_tables_api.c b/net/netfilter/nf_tables_api.c index a50fa4c308..54d6b1709b 100644 --- a/net/netfilter/nf_tables_api.c +++ b/net/netfilter/nf_tables_api.c @@ -2040,6 +2040,9 @@ static int nf_tables_addchain(struct nft_ctx *ctx, u8 family, u8 genmask, struct nft_stats __percpu *stats = NULL; struct nft_chain_hook hook; + if (table->flags & __NFT_TABLE_F_UPDATE) + return -EINVAL; + err = nft_chain_parse_hook(net, nla, &hook, family, true); if (err < 0) return err; From 3d7f9ca97498ae46ebe16ff778c09be843545b4b Mon Sep 17 00:00:00 2001 From: Greg Rose Date: Wed, 13 Nov 2024 16:56:36 -0800 Subject: [PATCH 48/49] netfilter: nf_tables: reject table flag and netdev basechain updates jira VULN-5118 subsystem-sync netfilter:nf_tables 4.18.0-553.16.1 commit-author Pablo Neira Ayuso commit 1e1fb6f00f52812277963365d9bd835b9b0ea4e0 upstream-diff The upstream diff brings in cruft we don't want, and will be superseded by the next commit but is here to maintain history. netdev basechain updates are stored in the transaction object hook list. When setting on the table dormant flag, it iterates over the existing hooks in the basechain. Thus, skipping the hooks that are being added/deleted in this transaction, which leaves hook registration in inconsistent state. Reject table flag updates in combination with netdev basechain updates in the same batch: - Update table flags and add/delete basechain: Check from basechain update path if there are pending flag updates for this table. - add/delete basechain and update table flags: Iterate over the transaction list to search for basechain updates from the table update path. In both cases, the batch is rejected. Based on suggestion from Florian Westphal. Fixes: b9703ed44ffb ("netfilter: nf_tables: support for adding new devices to an existing netdev chain") Fixes: 7d937b107108f ("netfilter: nf_tables: support for deleting devices in an existing netdev chain") Signed-off-by: Pablo Neira Ayuso (cherry picked from commit 1e1fb6f00f52812277963365d9bd835b9b0ea4e0) Signed-off-by: Greg Rose Conflicts: net/netfilter/nf_tables_api.c --- net/netfilter/nf_tables_api.c | 20 +++++++++++++++++++- 1 file changed, 19 insertions(+), 1 deletion(-) diff --git a/net/netfilter/nf_tables_api.c b/net/netfilter/nf_tables_api.c index 54d6b1709b..8f2b208022 100644 --- a/net/netfilter/nf_tables_api.c +++ b/net/netfilter/nf_tables_api.c @@ -954,6 +954,24 @@ static void nf_tables_table_disable(struct net *net, struct nft_table *table) #define __NFT_TABLE_F_UPDATE (__NFT_TABLE_F_WAS_DORMANT | \ __NFT_TABLE_F_WAS_AWAKEN) +static bool nft_table_pending_update(const struct nft_ctx *ctx) +{ + struct nft_trans *trans; + + if (ctx->table->flags & __NFT_TABLE_F_UPDATE) + return true; + + list_for_each_entry(trans, &ctx->net->nft.commit_list, list) { + if ((trans->msg_type == NFT_MSG_NEWCHAIN || + trans->msg_type == NFT_MSG_DELCHAIN) && + trans->ctx.table == ctx->table && + nft_trans_chain_update(trans)) + return true; + } + + return false; +} + static int nf_tables_updtable(struct nft_ctx *ctx) { struct nft_trans *trans; @@ -971,7 +989,7 @@ static int nf_tables_updtable(struct nft_ctx *ctx) return 0; /* No dormant off/on/off/on games in single transaction */ - if (ctx->table->flags & __NFT_TABLE_F_UPDATE) + if (nft_table_pending_update(ctx)) return -EINVAL; trans = nft_trans_alloc(ctx, NFT_MSG_NEWTABLE, From 7100e3085505cdbbef9ceabdc92c1a6f8fe7d7a2 Mon Sep 17 00:00:00 2001 From: Greg Rose Date: Wed, 13 Nov 2024 17:01:17 -0800 Subject: [PATCH 49/49] netfilter: nf_tables: discard table flag update with pending basechain deletion jira VULN-5118 cve CVE-2024-35897 commit-author Pablo Neira Ayuso commit 1bc83a019bbe268be3526406245ec28c2458a518 Hook unregistration is deferred to the commit phase, same occurs with hook updates triggered by the table dormant flag. When both commands are combined, this results in deleting a basechain while leaving its hook still registered in the core. Fixes: 179d9ba5559a ("netfilter: nf_tables: fix table flag updates") Signed-off-by: Pablo Neira Ayuso (cherry picked from commit 1bc83a019bbe268be3526406245ec28c2458a518) Signed-off-by: Greg Rose --- net/netfilter/nf_tables_api.c | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/net/netfilter/nf_tables_api.c b/net/netfilter/nf_tables_api.c index 8f2b208022..23fda0b177 100644 --- a/net/netfilter/nf_tables_api.c +++ b/net/netfilter/nf_tables_api.c @@ -962,10 +962,11 @@ static bool nft_table_pending_update(const struct nft_ctx *ctx) return true; list_for_each_entry(trans, &ctx->net->nft.commit_list, list) { - if ((trans->msg_type == NFT_MSG_NEWCHAIN || - trans->msg_type == NFT_MSG_DELCHAIN) && - trans->ctx.table == ctx->table && - nft_trans_chain_update(trans)) + if (trans->ctx.table == ctx->table && + ((trans->msg_type == NFT_MSG_NEWCHAIN && + nft_trans_chain_update(trans)) || + (trans->msg_type == NFT_MSG_DELCHAIN && + nft_is_base_chain(trans->ctx.chain)))) return true; }