Skip to content

Commit

Permalink
Revert "net: Add a second bind table hashed by port and address"
Browse files Browse the repository at this point in the history
This reverts:

commit d5a42de ("net: Add a second bind table hashed by port and address")
commit 538aaf9 ("selftests: Add test for timing a bind request to a port with a populated bhash entry")
Link: https://lore.kernel.org/netdev/[email protected]/

There are a few things that need to be fixed here:
* Updating bhash2 in cases where the socket's rcv saddr changes
* Adding bhash2 hashbucket locks

Links to syzbot reports:
https://lore.kernel.org/netdev/[email protected]/
https://lore.kernel.org/netdev/[email protected]/

Fixes: d5a42de ("net: Add a second bind table hashed by port and address")
Reported-by: [email protected]
Reported-by: [email protected]
Reported-by: [email protected]
Signed-off-by: Joanne Koong <[email protected]>
Link: https://lore.kernel.org/r/[email protected]
Signed-off-by: Jakub Kicinski <[email protected]>
  • Loading branch information
joannekoong authored and kuba-moo committed Jun 16, 2022
1 parent 219b51a commit 593d1eb
Show file tree
Hide file tree
Showing 10 changed files with 83 additions and 611 deletions.
3 changes: 0 additions & 3 deletions include/net/inet_connection_sock.h
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,6 @@
#undef INET_CSK_CLEAR_TIMERS

struct inet_bind_bucket;
struct inet_bind2_bucket;
struct tcp_congestion_ops;

/*
Expand Down Expand Up @@ -58,7 +57,6 @@ struct inet_connection_sock_af_ops {
*
* @icsk_accept_queue: FIFO of established children
* @icsk_bind_hash: Bind node
* @icsk_bind2_hash: Bind node in the bhash2 table
* @icsk_timeout: Timeout
* @icsk_retransmit_timer: Resend (no ack)
* @icsk_rto: Retransmit timeout
Expand All @@ -85,7 +83,6 @@ struct inet_connection_sock {
struct inet_sock icsk_inet;
struct request_sock_queue icsk_accept_queue;
struct inet_bind_bucket *icsk_bind_hash;
struct inet_bind2_bucket *icsk_bind2_hash;
unsigned long icsk_timeout;
struct timer_list icsk_retransmit_timer;
struct timer_list icsk_delack_timer;
Expand Down
68 changes: 1 addition & 67 deletions include/net/inet_hashtables.h
Original file line number Diff line number Diff line change
Expand Up @@ -90,32 +90,11 @@ struct inet_bind_bucket {
struct hlist_head owners;
};

struct inet_bind2_bucket {
possible_net_t ib_net;
int l3mdev;
unsigned short port;
union {
#if IS_ENABLED(CONFIG_IPV6)
struct in6_addr v6_rcv_saddr;
#endif
__be32 rcv_saddr;
};
/* Node in the inet2_bind_hashbucket chain */
struct hlist_node node;
/* List of sockets hashed to this bucket */
struct hlist_head owners;
};

static inline struct net *ib_net(struct inet_bind_bucket *ib)
{
return read_pnet(&ib->ib_net);
}

static inline struct net *ib2_net(struct inet_bind2_bucket *ib)
{
return read_pnet(&ib->ib_net);
}

#define inet_bind_bucket_for_each(tb, head) \
hlist_for_each_entry(tb, head, node)

Expand All @@ -124,15 +103,6 @@ struct inet_bind_hashbucket {
struct hlist_head chain;
};

/* This is synchronized using the inet_bind_hashbucket's spinlock.
* Instead of having separate spinlocks, the inet_bind2_hashbucket can share
* the inet_bind_hashbucket's given that in every case where the bhash2 table
* is useful, a lookup in the bhash table also occurs.
*/
struct inet_bind2_hashbucket {
struct hlist_head chain;
};

/* Sockets can be hashed in established or listening table.
* We must use different 'nulls' end-of-chain value for all hash buckets :
* A socket might transition from ESTABLISH to LISTEN state without
Expand Down Expand Up @@ -164,12 +134,6 @@ struct inet_hashinfo {
*/
struct kmem_cache *bind_bucket_cachep;
struct inet_bind_hashbucket *bhash;
/* The 2nd binding table hashed by port and address.
* This is used primarily for expediting the resolution of bind
* conflicts.
*/
struct kmem_cache *bind2_bucket_cachep;
struct inet_bind2_hashbucket *bhash2;
unsigned int bhash_size;

/* The 2nd listener table hashed by local port and address */
Expand Down Expand Up @@ -229,44 +193,14 @@ inet_bind_bucket_create(struct kmem_cache *cachep, struct net *net,
void inet_bind_bucket_destroy(struct kmem_cache *cachep,
struct inet_bind_bucket *tb);

static inline bool check_bind_bucket_match(struct inet_bind_bucket *tb,
struct net *net,
const unsigned short port,
int l3mdev)
{
return net_eq(ib_net(tb), net) && tb->port == port &&
tb->l3mdev == l3mdev;
}

struct inet_bind2_bucket *
inet_bind2_bucket_create(struct kmem_cache *cachep, struct net *net,
struct inet_bind2_hashbucket *head,
const unsigned short port, int l3mdev,
const struct sock *sk);

void inet_bind2_bucket_destroy(struct kmem_cache *cachep,
struct inet_bind2_bucket *tb);

struct inet_bind2_bucket *
inet_bind2_bucket_find(struct inet_hashinfo *hinfo, struct net *net,
const unsigned short port, int l3mdev,
struct sock *sk,
struct inet_bind2_hashbucket **head);

bool check_bind2_bucket_match_nulladdr(struct inet_bind2_bucket *tb,
struct net *net,
const unsigned short port,
int l3mdev,
const struct sock *sk);

static inline u32 inet_bhashfn(const struct net *net, const __u16 lport,
const u32 bhash_size)
{
return (lport + net_hash_mix(net)) & (bhash_size - 1);
}

void inet_bind_hash(struct sock *sk, struct inet_bind_bucket *tb,
struct inet_bind2_bucket *tb2, const unsigned short snum);
const unsigned short snum);

/* Caller must disable local BH processing. */
int __inet_inherit_port(const struct sock *sk, struct sock *child);
Expand Down
14 changes: 0 additions & 14 deletions include/net/sock.h
Original file line number Diff line number Diff line change
Expand Up @@ -348,7 +348,6 @@ struct sk_filter;
* @sk_txtime_report_errors: set report errors mode for SO_TXTIME
* @sk_txtime_unused: unused txtime flags
* @ns_tracker: tracker for netns reference
* @sk_bind2_node: bind node in the bhash2 table
*/
struct sock {
/*
Expand Down Expand Up @@ -538,7 +537,6 @@ struct sock {
#endif
struct rcu_head sk_rcu;
netns_tracker ns_tracker;
struct hlist_node sk_bind2_node;
};

enum sk_pacing {
Expand Down Expand Up @@ -819,16 +817,6 @@ static inline void sk_add_bind_node(struct sock *sk,
hlist_add_head(&sk->sk_bind_node, list);
}

static inline void __sk_del_bind2_node(struct sock *sk)
{
__hlist_del(&sk->sk_bind2_node);
}

static inline void sk_add_bind2_node(struct sock *sk, struct hlist_head *list)
{
hlist_add_head(&sk->sk_bind2_node, list);
}

#define sk_for_each(__sk, list) \
hlist_for_each_entry(__sk, list, sk_node)
#define sk_for_each_rcu(__sk, list) \
Expand All @@ -846,8 +834,6 @@ static inline void sk_add_bind2_node(struct sock *sk, struct hlist_head *list)
hlist_for_each_entry_safe(__sk, tmp, list, sk_node)
#define sk_for_each_bound(__sk, list) \
hlist_for_each_entry(__sk, list, sk_bind_node)
#define sk_for_each_bound_bhash2(__sk, list) \
hlist_for_each_entry(__sk, list, sk_bind2_node)

/**
* sk_for_each_entry_offset_rcu - iterate over a list at a given struct offset
Expand Down
33 changes: 5 additions & 28 deletions net/dccp/proto.c
Original file line number Diff line number Diff line change
Expand Up @@ -1120,12 +1120,6 @@ static int __init dccp_init(void)
SLAB_HWCACHE_ALIGN | SLAB_ACCOUNT, NULL);
if (!dccp_hashinfo.bind_bucket_cachep)
goto out_free_hashinfo2;
dccp_hashinfo.bind2_bucket_cachep =
kmem_cache_create("dccp_bind2_bucket",
sizeof(struct inet_bind2_bucket), 0,
SLAB_HWCACHE_ALIGN | SLAB_ACCOUNT, NULL);
if (!dccp_hashinfo.bind2_bucket_cachep)
goto out_free_bind_bucket_cachep;

/*
* Size and allocate the main established and bind bucket
Expand Down Expand Up @@ -1156,7 +1150,7 @@ static int __init dccp_init(void)

if (!dccp_hashinfo.ehash) {
DCCP_CRIT("Failed to allocate DCCP established hash table");
goto out_free_bind2_bucket_cachep;
goto out_free_bind_bucket_cachep;
}

for (i = 0; i <= dccp_hashinfo.ehash_mask; i++)
Expand All @@ -1182,23 +1176,14 @@ static int __init dccp_init(void)
goto out_free_dccp_locks;
}

dccp_hashinfo.bhash2 = (struct inet_bind2_hashbucket *)
__get_free_pages(GFP_ATOMIC | __GFP_NOWARN, bhash_order);

if (!dccp_hashinfo.bhash2) {
DCCP_CRIT("Failed to allocate DCCP bind2 hash table");
goto out_free_dccp_bhash;
}

for (i = 0; i < dccp_hashinfo.bhash_size; i++) {
spin_lock_init(&dccp_hashinfo.bhash[i].lock);
INIT_HLIST_HEAD(&dccp_hashinfo.bhash[i].chain);
INIT_HLIST_HEAD(&dccp_hashinfo.bhash2[i].chain);
}

rc = dccp_mib_init();
if (rc)
goto out_free_dccp_bhash2;
goto out_free_dccp_bhash;

rc = dccp_ackvec_init();
if (rc)
Expand All @@ -1222,38 +1207,30 @@ static int __init dccp_init(void)
dccp_ackvec_exit();
out_free_dccp_mib:
dccp_mib_exit();
out_free_dccp_bhash2:
free_pages((unsigned long)dccp_hashinfo.bhash2, bhash_order);
out_free_dccp_bhash:
free_pages((unsigned long)dccp_hashinfo.bhash, bhash_order);
out_free_dccp_locks:
inet_ehash_locks_free(&dccp_hashinfo);
out_free_dccp_ehash:
free_pages((unsigned long)dccp_hashinfo.ehash, ehash_order);
out_free_bind2_bucket_cachep:
kmem_cache_destroy(dccp_hashinfo.bind2_bucket_cachep);
out_free_bind_bucket_cachep:
kmem_cache_destroy(dccp_hashinfo.bind_bucket_cachep);
out_free_hashinfo2:
inet_hashinfo2_free_mod(&dccp_hashinfo);
out_fail:
dccp_hashinfo.bhash = NULL;
dccp_hashinfo.bhash2 = NULL;
dccp_hashinfo.ehash = NULL;
dccp_hashinfo.bind_bucket_cachep = NULL;
dccp_hashinfo.bind2_bucket_cachep = NULL;
return rc;
}

static void __exit dccp_fini(void)
{
int bhash_order = get_order(dccp_hashinfo.bhash_size *
sizeof(struct inet_bind_hashbucket));

ccid_cleanup_builtins();
dccp_mib_exit();
free_pages((unsigned long)dccp_hashinfo.bhash, bhash_order);
free_pages((unsigned long)dccp_hashinfo.bhash2, bhash_order);
free_pages((unsigned long)dccp_hashinfo.bhash,
get_order(dccp_hashinfo.bhash_size *
sizeof(struct inet_bind_hashbucket)));
free_pages((unsigned long)dccp_hashinfo.ehash,
get_order((dccp_hashinfo.ehash_mask + 1) *
sizeof(struct inet_ehash_bucket)));
Expand Down
Loading

0 comments on commit 593d1eb

Please sign in to comment.