Skip to content

Commit 2bd624b

Browse files
anoobsdavem330
authored andcommitted
packet: Do not call fanout_release from atomic contexts
Commit 6664498 ("packet: call fanout_release, while UNREGISTERING a netdev"), unfortunately, introduced the following issues. 1. calling mutex_lock(&fanout_mutex) (fanout_release()) from inside rcu_read-side critical section. rcu_read_lock disables preemption, most often, which prohibits calling sleeping functions. [ ] include/linux/rcupdate.h:560 Illegal context switch in RCU read-side critical section! [ ] [ ] rcu_scheduler_active = 1, debug_locks = 0 [ ] 4 locks held by ovs-vswitchd/1969: [ ] #0: (cb_lock){++++++}, at: [<ffffffff8158a6c9>] genl_rcv+0x19/0x40 [ ] #1: (ovs_mutex){+.+.+.}, at: [<ffffffffa04878ca>] ovs_vport_cmd_del+0x4a/0x100 [openvswitch] [ ] #2: (rtnl_mutex){+.+.+.}, at: [<ffffffff81564157>] rtnl_lock+0x17/0x20 [ ] #3: (rcu_read_lock){......}, at: [<ffffffff81614165>] packet_notifier+0x5/0x3f0 [ ] [ ] Call Trace: [ ] [<ffffffff813770c1>] dump_stack+0x85/0xc4 [ ] [<ffffffff810c9077>] lockdep_rcu_suspicious+0x107/0x110 [ ] [<ffffffff810a2da7>] ___might_sleep+0x57/0x210 [ ] [<ffffffff810a2fd0>] __might_sleep+0x70/0x90 [ ] [<ffffffff8162e80c>] mutex_lock_nested+0x3c/0x3a0 [ ] [<ffffffff810de93f>] ? vprintk_default+0x1f/0x30 [ ] [<ffffffff81186e88>] ? printk+0x4d/0x4f [ ] [<ffffffff816106dd>] fanout_release+0x1d/0xe0 [ ] [<ffffffff81614459>] packet_notifier+0x2f9/0x3f0 2. calling mutex_lock(&fanout_mutex) inside spin_lock(&po->bind_lock). "sleeping function called from invalid context" [ ] BUG: sleeping function called from invalid context at kernel/locking/mutex.c:620 [ ] in_atomic(): 1, irqs_disabled(): 0, pid: 1969, name: ovs-vswitchd [ ] INFO: lockdep is turned off. [ ] Call Trace: [ ] [<ffffffff813770c1>] dump_stack+0x85/0xc4 [ ] [<ffffffff810a2f52>] ___might_sleep+0x202/0x210 [ ] [<ffffffff810a2fd0>] __might_sleep+0x70/0x90 [ ] [<ffffffff8162e80c>] mutex_lock_nested+0x3c/0x3a0 [ ] [<ffffffff816106dd>] fanout_release+0x1d/0xe0 [ ] [<ffffffff81614459>] packet_notifier+0x2f9/0x3f0 3. calling dev_remove_pack(&fanout->prot_hook), from inside spin_lock(&po->bind_lock) or rcu_read-side critical-section. dev_remove_pack() -> synchronize_net(), which might sleep. [ ] BUG: scheduling while atomic: ovs-vswitchd/1969/0x00000002 [ ] INFO: lockdep is turned off. [ ] Call Trace: [ ] [<ffffffff813770c1>] dump_stack+0x85/0xc4 [ ] [<ffffffff81186274>] __schedule_bug+0x64/0x73 [ ] [<ffffffff8162b8cb>] __schedule+0x6b/0xd10 [ ] [<ffffffff8162c5db>] schedule+0x6b/0x80 [ ] [<ffffffff81630b1d>] schedule_timeout+0x38d/0x410 [ ] [<ffffffff810ea3fd>] synchronize_sched_expedited+0x53d/0x810 [ ] [<ffffffff810ea6de>] synchronize_rcu_expedited+0xe/0x10 [ ] [<ffffffff8154eab5>] synchronize_net+0x35/0x50 [ ] [<ffffffff8154eae3>] dev_remove_pack+0x13/0x20 [ ] [<ffffffff8161077e>] fanout_release+0xbe/0xe0 [ ] [<ffffffff81614459>] packet_notifier+0x2f9/0x3f0 4. fanout_release() races with calls from different CPU. To fix the above problems, remove the call to fanout_release() under rcu_read_lock(). Instead, call __dev_remove_pack(&fanout->prot_hook) and netdev_run_todo will be happy that &dev->ptype_specific list is empty. In order to achieve this, I moved dev_{add,remove}_pack() out of fanout_{add,release} to __fanout_{link,unlink}. So, call to {,__}unregister_prot_hook() will make sure fanout->prot_hook is removed as well. Fixes: 6664498 ("packet: call fanout_release, while UNREGISTERING a netdev") Reported-by: Eric Dumazet <[email protected]> Signed-off-by: Anoob Soman <[email protected]> Acked-by: Eric Dumazet <[email protected]> Signed-off-by: David S. Miller <[email protected]>
1 parent 4695dae commit 2bd624b

File tree

1 file changed

+22
-9
lines changed

1 file changed

+22
-9
lines changed

net/packet/af_packet.c

+22-9
Original file line numberDiff line numberDiff line change
@@ -1497,6 +1497,8 @@ static void __fanout_link(struct sock *sk, struct packet_sock *po)
14971497
f->arr[f->num_members] = sk;
14981498
smp_wmb();
14991499
f->num_members++;
1500+
if (f->num_members == 1)
1501+
dev_add_pack(&f->prot_hook);
15001502
spin_unlock(&f->lock);
15011503
}
15021504

@@ -1513,6 +1515,8 @@ static void __fanout_unlink(struct sock *sk, struct packet_sock *po)
15131515
BUG_ON(i >= f->num_members);
15141516
f->arr[i] = f->arr[f->num_members - 1];
15151517
f->num_members--;
1518+
if (f->num_members == 0)
1519+
__dev_remove_pack(&f->prot_hook);
15161520
spin_unlock(&f->lock);
15171521
}
15181522

@@ -1693,7 +1697,6 @@ static int fanout_add(struct sock *sk, u16 id, u16 type_flags)
16931697
match->prot_hook.func = packet_rcv_fanout;
16941698
match->prot_hook.af_packet_priv = match;
16951699
match->prot_hook.id_match = match_fanout_group;
1696-
dev_add_pack(&match->prot_hook);
16971700
list_add(&match->list, &fanout_list);
16981701
}
16991702
err = -EINVAL;
@@ -1718,7 +1721,12 @@ static int fanout_add(struct sock *sk, u16 id, u16 type_flags)
17181721
return err;
17191722
}
17201723

1721-
static void fanout_release(struct sock *sk)
1724+
/* If pkt_sk(sk)->fanout->sk_ref is zero, this function removes
1725+
* pkt_sk(sk)->fanout from fanout_list and returns pkt_sk(sk)->fanout.
1726+
* It is the responsibility of the caller to call fanout_release_data() and
1727+
* free the returned packet_fanout (after synchronize_net())
1728+
*/
1729+
static struct packet_fanout *fanout_release(struct sock *sk)
17221730
{
17231731
struct packet_sock *po = pkt_sk(sk);
17241732
struct packet_fanout *f;
@@ -1728,17 +1736,17 @@ static void fanout_release(struct sock *sk)
17281736
if (f) {
17291737
po->fanout = NULL;
17301738

1731-
if (atomic_dec_and_test(&f->sk_ref)) {
1739+
if (atomic_dec_and_test(&f->sk_ref))
17321740
list_del(&f->list);
1733-
dev_remove_pack(&f->prot_hook);
1734-
fanout_release_data(f);
1735-
kfree(f);
1736-
}
1741+
else
1742+
f = NULL;
17371743

17381744
if (po->rollover)
17391745
kfree_rcu(po->rollover, rcu);
17401746
}
17411747
mutex_unlock(&fanout_mutex);
1748+
1749+
return f;
17421750
}
17431751

17441752
static bool packet_extra_vlan_len_allowed(const struct net_device *dev,
@@ -2912,6 +2920,7 @@ static int packet_release(struct socket *sock)
29122920
{
29132921
struct sock *sk = sock->sk;
29142922
struct packet_sock *po;
2923+
struct packet_fanout *f;
29152924
struct net *net;
29162925
union tpacket_req_u req_u;
29172926

@@ -2951,9 +2960,14 @@ static int packet_release(struct socket *sock)
29512960
packet_set_ring(sk, &req_u, 1, 1);
29522961
}
29532962

2954-
fanout_release(sk);
2963+
f = fanout_release(sk);
29552964

29562965
synchronize_net();
2966+
2967+
if (f) {
2968+
fanout_release_data(f);
2969+
kfree(f);
2970+
}
29572971
/*
29582972
* Now the socket is dead. No more input will appear.
29592973
*/
@@ -3905,7 +3919,6 @@ static int packet_notifier(struct notifier_block *this,
39053919
}
39063920
if (msg == NETDEV_UNREGISTER) {
39073921
packet_cached_dev_reset(po);
3908-
fanout_release(sk);
39093922
po->ifindex = -1;
39103923
if (po->prot_hook.dev)
39113924
dev_put(po->prot_hook.dev);

0 commit comments

Comments
 (0)