Skip to content

Commit

Permalink
Merge branch 'net-data-races'
Browse files Browse the repository at this point in the history
Eric Dumazet says:

====================
net: annotate data-races

This series was inspired by a syzbot/KCSAN report.

This will later also permit some optimizations,
like not having to lock the socket while reading/writing
some of its fields.
====================

Signed-off-by: David S. Miller <[email protected]>
  • Loading branch information
davem330 committed Jul 29, 2023
2 parents 7938cd1 + 8bf43be commit 37e3cec
Show file tree
Hide file tree
Showing 27 changed files with 94 additions and 80 deletions.
7 changes: 4 additions & 3 deletions include/net/inet_sock.h
Original file line number Diff line number Diff line change
Expand Up @@ -107,11 +107,12 @@ static inline struct inet_request_sock *inet_rsk(const struct request_sock *sk)

static inline u32 inet_request_mark(const struct sock *sk, struct sk_buff *skb)
{
if (!sk->sk_mark &&
READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_fwmark_accept))
u32 mark = READ_ONCE(sk->sk_mark);

if (!mark && READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_fwmark_accept))
return skb->mark;

return sk->sk_mark;
return mark;
}

static inline int inet_request_bound_dev_if(const struct sock *sk,
Expand Down
2 changes: 1 addition & 1 deletion include/net/ip.h
Original file line number Diff line number Diff line change
Expand Up @@ -93,7 +93,7 @@ static inline void ipcm_init_sk(struct ipcm_cookie *ipcm,
{
ipcm_init(ipcm);

ipcm->sockc.mark = inet->sk.sk_mark;
ipcm->sockc.mark = READ_ONCE(inet->sk.sk_mark);
ipcm->sockc.tsflags = inet->sk.sk_tsflags;
ipcm->oif = READ_ONCE(inet->sk.sk_bound_dev_if);
ipcm->addr = inet->inet_saddr;
Expand Down
4 changes: 2 additions & 2 deletions include/net/route.h
Original file line number Diff line number Diff line change
Expand Up @@ -168,7 +168,7 @@ static inline struct rtable *ip_route_output_ports(struct net *net, struct flowi
__be16 dport, __be16 sport,
__u8 proto, __u8 tos, int oif)
{
flowi4_init_output(fl4, oif, sk ? sk->sk_mark : 0, tos,
flowi4_init_output(fl4, oif, sk ? READ_ONCE(sk->sk_mark) : 0, tos,
RT_SCOPE_UNIVERSE, proto,
sk ? inet_sk_flowi_flags(sk) : 0,
daddr, saddr, dport, sport, sock_net_uid(net, sk));
Expand Down Expand Up @@ -301,7 +301,7 @@ static inline void ip_route_connect_init(struct flowi4 *fl4, __be32 dst,
if (inet_sk(sk)->transparent)
flow_flags |= FLOWI_FLAG_ANYSRC;

flowi4_init_output(fl4, oif, sk->sk_mark, ip_sock_rt_tos(sk),
flowi4_init_output(fl4, oif, READ_ONCE(sk->sk_mark), ip_sock_rt_tos(sk),
ip_sock_rt_scope(sk), protocol, flow_flags, dst,
src, dport, sport, sk->sk_uid);
}
Expand Down
2 changes: 1 addition & 1 deletion net/can/raw.c
Original file line number Diff line number Diff line change
Expand Up @@ -865,7 +865,7 @@ static int raw_sendmsg(struct socket *sock, struct msghdr *msg, size_t size)

skb->dev = dev;
skb->priority = sk->sk_priority;
skb->mark = sk->sk_mark;
skb->mark = READ_ONCE(sk->sk_mark);
skb->tstamp = sockc.transmit_time;

skb_setup_tx_timestamp(skb, sockc.tsflags);
Expand Down
69 changes: 40 additions & 29 deletions net/core/sock.c
Original file line number Diff line number Diff line change
Expand Up @@ -429,6 +429,7 @@ static int sock_set_timeout(long *timeo_p, sockptr_t optval, int optlen,
{
struct __kernel_sock_timeval tv;
int err = sock_copy_user_timeval(&tv, optval, optlen, old_timeval);
long val;

if (err)
return err;
Expand All @@ -439,19 +440,20 @@ static int sock_set_timeout(long *timeo_p, sockptr_t optval, int optlen,
if (tv.tv_sec < 0) {
static int warned __read_mostly;

*timeo_p = 0;
WRITE_ONCE(*timeo_p, 0);
if (warned < 10 && net_ratelimit()) {
warned++;
pr_info("%s: `%s' (pid %d) tries to set negative timeout\n",
__func__, current->comm, task_pid_nr(current));
}
return 0;
}
*timeo_p = MAX_SCHEDULE_TIMEOUT;
if (tv.tv_sec == 0 && tv.tv_usec == 0)
return 0;
if (tv.tv_sec < (MAX_SCHEDULE_TIMEOUT / HZ - 1))
*timeo_p = tv.tv_sec * HZ + DIV_ROUND_UP((unsigned long)tv.tv_usec, USEC_PER_SEC / HZ);
val = MAX_SCHEDULE_TIMEOUT;
if ((tv.tv_sec || tv.tv_usec) &&
(tv.tv_sec < (MAX_SCHEDULE_TIMEOUT / HZ - 1)))
val = tv.tv_sec * HZ + DIV_ROUND_UP((unsigned long)tv.tv_usec,
USEC_PER_SEC / HZ);
WRITE_ONCE(*timeo_p, val);
return 0;
}

Expand Down Expand Up @@ -804,7 +806,7 @@ EXPORT_SYMBOL(sock_no_linger);
void sock_set_priority(struct sock *sk, u32 priority)
{
lock_sock(sk);
sk->sk_priority = priority;
WRITE_ONCE(sk->sk_priority, priority);
release_sock(sk);
}
EXPORT_SYMBOL(sock_set_priority);
Expand All @@ -813,9 +815,9 @@ void sock_set_sndtimeo(struct sock *sk, s64 secs)
{
lock_sock(sk);
if (secs && secs < MAX_SCHEDULE_TIMEOUT / HZ - 1)
sk->sk_sndtimeo = secs * HZ;
WRITE_ONCE(sk->sk_sndtimeo, secs * HZ);
else
sk->sk_sndtimeo = MAX_SCHEDULE_TIMEOUT;
WRITE_ONCE(sk->sk_sndtimeo, MAX_SCHEDULE_TIMEOUT);
release_sock(sk);
}
EXPORT_SYMBOL(sock_set_sndtimeo);
Expand Down Expand Up @@ -988,7 +990,7 @@ EXPORT_SYMBOL(sock_set_rcvbuf);
static void __sock_set_mark(struct sock *sk, u32 val)
{
if (val != sk->sk_mark) {
sk->sk_mark = val;
WRITE_ONCE(sk->sk_mark, val);
sk_dst_reset(sk);
}
}
Expand All @@ -1007,7 +1009,7 @@ static void sock_release_reserved_memory(struct sock *sk, int bytes)
bytes = round_down(bytes, PAGE_SIZE);

WARN_ON(bytes > sk->sk_reserved_mem);
sk->sk_reserved_mem -= bytes;
WRITE_ONCE(sk->sk_reserved_mem, sk->sk_reserved_mem - bytes);
sk_mem_reclaim(sk);
}

Expand Down Expand Up @@ -1044,7 +1046,8 @@ static int sock_reserve_memory(struct sock *sk, int bytes)
}
sk->sk_forward_alloc += pages << PAGE_SHIFT;

sk->sk_reserved_mem += pages << PAGE_SHIFT;
WRITE_ONCE(sk->sk_reserved_mem,
sk->sk_reserved_mem + (pages << PAGE_SHIFT));

return 0;
}
Expand Down Expand Up @@ -1213,7 +1216,7 @@ int sk_setsockopt(struct sock *sk, int level, int optname,
if ((val >= 0 && val <= 6) ||
sockopt_ns_capable(sock_net(sk)->user_ns, CAP_NET_RAW) ||
sockopt_ns_capable(sock_net(sk)->user_ns, CAP_NET_ADMIN))
sk->sk_priority = val;
WRITE_ONCE(sk->sk_priority, val);
else
ret = -EPERM;
break;
Expand Down Expand Up @@ -1438,7 +1441,8 @@ int sk_setsockopt(struct sock *sk, int level, int optname,
cmpxchg(&sk->sk_pacing_status,
SK_PACING_NONE,
SK_PACING_NEEDED);
sk->sk_max_pacing_rate = ulval;
/* Pairs with READ_ONCE() from sk_getsockopt() */
WRITE_ONCE(sk->sk_max_pacing_rate, ulval);
sk->sk_pacing_rate = min(sk->sk_pacing_rate, ulval);
break;
}
Expand Down Expand Up @@ -1533,7 +1537,9 @@ int sk_setsockopt(struct sock *sk, int level, int optname,
}
if ((u8)val == SOCK_TXREHASH_DEFAULT)
val = READ_ONCE(sock_net(sk)->core.sysctl_txrehash);
/* Paired with READ_ONCE() in tcp_rtx_synack() */
/* Paired with READ_ONCE() in tcp_rtx_synack()
* and sk_getsockopt().
*/
WRITE_ONCE(sk->sk_txrehash, (u8)val);
break;

Expand Down Expand Up @@ -1633,11 +1639,11 @@ int sk_getsockopt(struct sock *sk, int level, int optname,
break;

case SO_SNDBUF:
v.val = sk->sk_sndbuf;
v.val = READ_ONCE(sk->sk_sndbuf);
break;

case SO_RCVBUF:
v.val = sk->sk_rcvbuf;
v.val = READ_ONCE(sk->sk_rcvbuf);
break;

case SO_REUSEADDR:
Expand Down Expand Up @@ -1679,7 +1685,7 @@ int sk_getsockopt(struct sock *sk, int level, int optname,
break;

case SO_PRIORITY:
v.val = sk->sk_priority;
v.val = READ_ONCE(sk->sk_priority);
break;

case SO_LINGER:
Expand Down Expand Up @@ -1717,16 +1723,18 @@ int sk_getsockopt(struct sock *sk, int level, int optname,

case SO_RCVTIMEO_OLD:
case SO_RCVTIMEO_NEW:
lv = sock_get_timeout(sk->sk_rcvtimeo, &v, SO_RCVTIMEO_OLD == optname);
lv = sock_get_timeout(READ_ONCE(sk->sk_rcvtimeo), &v,
SO_RCVTIMEO_OLD == optname);
break;

case SO_SNDTIMEO_OLD:
case SO_SNDTIMEO_NEW:
lv = sock_get_timeout(sk->sk_sndtimeo, &v, SO_SNDTIMEO_OLD == optname);
lv = sock_get_timeout(READ_ONCE(sk->sk_sndtimeo), &v,
SO_SNDTIMEO_OLD == optname);
break;

case SO_RCVLOWAT:
v.val = sk->sk_rcvlowat;
v.val = READ_ONCE(sk->sk_rcvlowat);
break;

case SO_SNDLOWAT:
Expand Down Expand Up @@ -1843,7 +1851,7 @@ int sk_getsockopt(struct sock *sk, int level, int optname,
optval, optlen, len);

case SO_MARK:
v.val = sk->sk_mark;
v.val = READ_ONCE(sk->sk_mark);
break;

case SO_RCVMARK:
Expand All @@ -1862,7 +1870,7 @@ int sk_getsockopt(struct sock *sk, int level, int optname,
if (!sock->ops->set_peek_off)
return -EOPNOTSUPP;

v.val = sk->sk_peek_off;
v.val = READ_ONCE(sk->sk_peek_off);
break;
case SO_NOFCS:
v.val = sock_flag(sk, SOCK_NOFCS);
Expand Down Expand Up @@ -1892,20 +1900,22 @@ int sk_getsockopt(struct sock *sk, int level, int optname,

#ifdef CONFIG_NET_RX_BUSY_POLL
case SO_BUSY_POLL:
v.val = sk->sk_ll_usec;
v.val = READ_ONCE(sk->sk_ll_usec);
break;
case SO_PREFER_BUSY_POLL:
v.val = READ_ONCE(sk->sk_prefer_busy_poll);
break;
#endif

case SO_MAX_PACING_RATE:
/* The READ_ONCE() pair with the WRITE_ONCE() in sk_setsockopt() */
if (sizeof(v.ulval) != sizeof(v.val) && len >= sizeof(v.ulval)) {
lv = sizeof(v.ulval);
v.ulval = sk->sk_max_pacing_rate;
v.ulval = READ_ONCE(sk->sk_max_pacing_rate);
} else {
/* 32bit version */
v.val = min_t(unsigned long, sk->sk_max_pacing_rate, ~0U);
v.val = min_t(unsigned long, ~0U,
READ_ONCE(sk->sk_max_pacing_rate));
}
break;

Expand Down Expand Up @@ -1973,11 +1983,12 @@ int sk_getsockopt(struct sock *sk, int level, int optname,
break;

case SO_RESERVE_MEM:
v.val = sk->sk_reserved_mem;
v.val = READ_ONCE(sk->sk_reserved_mem);
break;

case SO_TXREHASH:
v.val = sk->sk_txrehash;
/* Paired with WRITE_ONCE() in sk_setsockopt() */
v.val = READ_ONCE(sk->sk_txrehash);
break;

default:
Expand Down Expand Up @@ -3168,7 +3179,7 @@ EXPORT_SYMBOL(__sk_mem_reclaim);

int sk_set_peek_off(struct sock *sk, int val)
{
sk->sk_peek_off = val;
WRITE_ONCE(sk->sk_peek_off, val);
return 0;
}
EXPORT_SYMBOL_GPL(sk_set_peek_off);
Expand Down
4 changes: 2 additions & 2 deletions net/dccp/ipv6.c
Original file line number Diff line number Diff line change
Expand Up @@ -238,8 +238,8 @@ static int dccp_v6_send_response(const struct sock *sk, struct request_sock *req
opt = ireq->ipv6_opt;
if (!opt)
opt = rcu_dereference(np->opt);
err = ip6_xmit(sk, skb, &fl6, sk->sk_mark, opt, np->tclass,
sk->sk_priority);
err = ip6_xmit(sk, skb, &fl6, READ_ONCE(sk->sk_mark), opt,
np->tclass, sk->sk_priority);
rcu_read_unlock();
err = net_xmit_eval(err);
}
Expand Down
4 changes: 2 additions & 2 deletions net/ipv4/inet_diag.c
Original file line number Diff line number Diff line change
Expand Up @@ -150,7 +150,7 @@ int inet_diag_msg_attrs_fill(struct sock *sk, struct sk_buff *skb,
}
#endif

if (net_admin && nla_put_u32(skb, INET_DIAG_MARK, sk->sk_mark))
if (net_admin && nla_put_u32(skb, INET_DIAG_MARK, READ_ONCE(sk->sk_mark)))
goto errout;

if (ext & (1 << (INET_DIAG_CLASS_ID - 1)) ||
Expand Down Expand Up @@ -799,7 +799,7 @@ int inet_diag_bc_sk(const struct nlattr *bc, struct sock *sk)
entry.ifindex = sk->sk_bound_dev_if;
entry.userlocks = sk_fullsock(sk) ? sk->sk_userlocks : 0;
if (sk_fullsock(sk))
entry.mark = sk->sk_mark;
entry.mark = READ_ONCE(sk->sk_mark);
else if (sk->sk_state == TCP_NEW_SYN_RECV)
entry.mark = inet_rsk(inet_reqsk(sk))->ir_mark;
else if (sk->sk_state == TCP_TIME_WAIT)
Expand Down
8 changes: 4 additions & 4 deletions net/ipv4/ip_output.c
Original file line number Diff line number Diff line change
Expand Up @@ -184,9 +184,9 @@ int ip_build_and_send_pkt(struct sk_buff *skb, const struct sock *sk,
ip_options_build(skb, &opt->opt, daddr, rt);
}

skb->priority = sk->sk_priority;
skb->priority = READ_ONCE(sk->sk_priority);
if (!skb->mark)
skb->mark = sk->sk_mark;
skb->mark = READ_ONCE(sk->sk_mark);

/* Send it out. */
return ip_local_out(net, skb->sk, skb);
Expand Down Expand Up @@ -528,8 +528,8 @@ int __ip_queue_xmit(struct sock *sk, struct sk_buff *skb, struct flowi *fl,
skb_shinfo(skb)->gso_segs ?: 1);

/* TODO : should we use skb->sk here instead of sk ? */
skb->priority = sk->sk_priority;
skb->mark = sk->sk_mark;
skb->priority = READ_ONCE(sk->sk_priority);
skb->mark = READ_ONCE(sk->sk_mark);

res = ip_local_out(net, sk, skb);
rcu_read_unlock();
Expand Down
2 changes: 1 addition & 1 deletion net/ipv4/ip_sockglue.c
Original file line number Diff line number Diff line change
Expand Up @@ -592,7 +592,7 @@ void __ip_sock_set_tos(struct sock *sk, int val)
}
if (inet_sk(sk)->tos != val) {
inet_sk(sk)->tos = val;
sk->sk_priority = rt_tos2priority(val);
WRITE_ONCE(sk->sk_priority, rt_tos2priority(val));
sk_dst_reset(sk);
}
}
Expand Down
2 changes: 1 addition & 1 deletion net/ipv4/raw.c
Original file line number Diff line number Diff line change
Expand Up @@ -348,7 +348,7 @@ static int raw_send_hdrinc(struct sock *sk, struct flowi4 *fl4,
goto error;
skb_reserve(skb, hlen);

skb->priority = sk->sk_priority;
skb->priority = READ_ONCE(sk->sk_priority);
skb->mark = sockc->mark;
skb->tstamp = sockc->transmit_time;
skb_dst_set(skb, &rt->dst);
Expand Down
4 changes: 2 additions & 2 deletions net/ipv4/route.c
Original file line number Diff line number Diff line change
Expand Up @@ -518,7 +518,7 @@ static void __build_flow_key(const struct net *net, struct flowi4 *fl4,
const struct inet_sock *inet = inet_sk(sk);

oif = sk->sk_bound_dev_if;
mark = sk->sk_mark;
mark = READ_ONCE(sk->sk_mark);
tos = ip_sock_rt_tos(sk);
scope = ip_sock_rt_scope(sk);
prot = inet->hdrincl ? IPPROTO_RAW : sk->sk_protocol;
Expand Down Expand Up @@ -552,7 +552,7 @@ static void build_sk_flow_key(struct flowi4 *fl4, const struct sock *sk)
inet_opt = rcu_dereference(inet->inet_opt);
if (inet_opt && inet_opt->opt.srr)
daddr = inet_opt->opt.faddr;
flowi4_init_output(fl4, sk->sk_bound_dev_if, sk->sk_mark,
flowi4_init_output(fl4, sk->sk_bound_dev_if, READ_ONCE(sk->sk_mark),
ip_sock_rt_tos(sk) & IPTOS_RT_MASK,
ip_sock_rt_scope(sk),
inet->hdrincl ? IPPROTO_RAW : sk->sk_protocol,
Expand Down
4 changes: 2 additions & 2 deletions net/ipv4/tcp_ipv4.c
Original file line number Diff line number Diff line change
Expand Up @@ -931,9 +931,9 @@ static void tcp_v4_send_ack(const struct sock *sk,
ctl_sk = this_cpu_read(ipv4_tcp_sk);
sock_net_set(ctl_sk, net);
ctl_sk->sk_mark = (sk->sk_state == TCP_TIME_WAIT) ?
inet_twsk(sk)->tw_mark : sk->sk_mark;
inet_twsk(sk)->tw_mark : READ_ONCE(sk->sk_mark);
ctl_sk->sk_priority = (sk->sk_state == TCP_TIME_WAIT) ?
inet_twsk(sk)->tw_priority : sk->sk_priority;
inet_twsk(sk)->tw_priority : READ_ONCE(sk->sk_priority);
transmit_time = tcp_transmit_time(sk);
ip_send_unicast_reply(ctl_sk,
skb, &TCP_SKB_CB(skb)->header.h4.opt,
Expand Down
2 changes: 1 addition & 1 deletion net/ipv6/ping.c
Original file line number Diff line number Diff line change
Expand Up @@ -120,7 +120,7 @@ static int ping_v6_sendmsg(struct sock *sk, struct msghdr *msg, size_t len)

ipcm6_init_sk(&ipc6, np);
ipc6.sockc.tsflags = sk->sk_tsflags;
ipc6.sockc.mark = sk->sk_mark;
ipc6.sockc.mark = READ_ONCE(sk->sk_mark);

fl6.flowi6_oif = oif;

Expand Down
Loading

0 comments on commit 37e3cec

Please sign in to comment.