35
35
#include <linux/in.h>
36
36
#include <linux/module.h>
37
37
#include <net/tcp.h>
38
+ #include <net/net_namespace.h>
39
+ #include <net/netns/generic.h>
40
+ #include <net/tcp.h>
38
41
39
42
#include "rds.h"
40
43
#include "tcp.h"
@@ -250,16 +253,7 @@ static void rds_tcp_destroy_conns(void)
250
253
}
251
254
}
252
255
253
- static void rds_tcp_exit (void )
254
- {
255
- rds_info_deregister_func (RDS_INFO_TCP_SOCKETS , rds_tcp_tc_info );
256
- rds_tcp_listen_stop ();
257
- rds_tcp_destroy_conns ();
258
- rds_trans_unregister (& rds_tcp_transport );
259
- rds_tcp_recv_exit ();
260
- kmem_cache_destroy (rds_tcp_conn_slab );
261
- }
262
- module_exit (rds_tcp_exit );
256
+ static void rds_tcp_exit (void );
263
257
264
258
struct rds_transport rds_tcp_transport = {
265
259
.laddr_check = rds_tcp_laddr_check ,
@@ -281,6 +275,136 @@ struct rds_transport rds_tcp_transport = {
281
275
.t_prefer_loopback = 1 ,
282
276
};
283
277
278
+ static int rds_tcp_netid ;
279
+
280
+ /* per-network namespace private data for this module */
281
+ struct rds_tcp_net {
282
+ struct socket * rds_tcp_listen_sock ;
283
+ struct work_struct rds_tcp_accept_w ;
284
+ };
285
+
286
+ static void rds_tcp_accept_worker (struct work_struct * work )
287
+ {
288
+ struct rds_tcp_net * rtn = container_of (work ,
289
+ struct rds_tcp_net ,
290
+ rds_tcp_accept_w );
291
+
292
+ while (rds_tcp_accept_one (rtn -> rds_tcp_listen_sock ) == 0 )
293
+ cond_resched ();
294
+ }
295
+
296
+ void rds_tcp_accept_work (struct sock * sk )
297
+ {
298
+ struct net * net = sock_net (sk );
299
+ struct rds_tcp_net * rtn = net_generic (net , rds_tcp_netid );
300
+
301
+ queue_work (rds_wq , & rtn -> rds_tcp_accept_w );
302
+ }
303
+
304
+ static __net_init int rds_tcp_init_net (struct net * net )
305
+ {
306
+ struct rds_tcp_net * rtn = net_generic (net , rds_tcp_netid );
307
+
308
+ rtn -> rds_tcp_listen_sock = rds_tcp_listen_init (net );
309
+ if (!rtn -> rds_tcp_listen_sock ) {
310
+ pr_warn ("could not set up listen sock\n" );
311
+ return - EAFNOSUPPORT ;
312
+ }
313
+ INIT_WORK (& rtn -> rds_tcp_accept_w , rds_tcp_accept_worker );
314
+ return 0 ;
315
+ }
316
+
317
+ static void __net_exit rds_tcp_exit_net (struct net * net )
318
+ {
319
+ struct rds_tcp_net * rtn = net_generic (net , rds_tcp_netid );
320
+
321
+ /* If rds_tcp_exit_net() is called as a result of netns deletion,
322
+ * the rds_tcp_kill_sock() device notifier would already have cleaned
323
+ * up the listen socket, thus there is no work to do in this function.
324
+ *
325
+ * If rds_tcp_exit_net() is called as a result of module unload,
326
+ * i.e., due to rds_tcp_exit() -> unregister_pernet_subsys(), then
327
+ * we do need to clean up the listen socket here.
328
+ */
329
+ if (rtn -> rds_tcp_listen_sock ) {
330
+ rds_tcp_listen_stop (rtn -> rds_tcp_listen_sock );
331
+ rtn -> rds_tcp_listen_sock = NULL ;
332
+ flush_work (& rtn -> rds_tcp_accept_w );
333
+ }
334
+ }
335
+
336
+ static struct pernet_operations rds_tcp_net_ops = {
337
+ .init = rds_tcp_init_net ,
338
+ .exit = rds_tcp_exit_net ,
339
+ .id = & rds_tcp_netid ,
340
+ .size = sizeof (struct rds_tcp_net ),
341
+ };
342
+
343
+ static void rds_tcp_kill_sock (struct net * net )
344
+ {
345
+ struct rds_tcp_connection * tc , * _tc ;
346
+ struct sock * sk ;
347
+ LIST_HEAD (tmp_list );
348
+ struct rds_tcp_net * rtn = net_generic (net , rds_tcp_netid );
349
+
350
+ rds_tcp_listen_stop (rtn -> rds_tcp_listen_sock );
351
+ rtn -> rds_tcp_listen_sock = NULL ;
352
+ flush_work (& rtn -> rds_tcp_accept_w );
353
+ spin_lock_irq (& rds_tcp_conn_lock );
354
+ list_for_each_entry_safe (tc , _tc , & rds_tcp_conn_list , t_tcp_node ) {
355
+ struct net * c_net = read_pnet (& tc -> conn -> c_net );
356
+
357
+ if (net != c_net || !tc -> t_sock )
358
+ continue ;
359
+ list_move_tail (& tc -> t_tcp_node , & tmp_list );
360
+ }
361
+ spin_unlock_irq (& rds_tcp_conn_lock );
362
+ list_for_each_entry_safe (tc , _tc , & tmp_list , t_tcp_node ) {
363
+ sk = tc -> t_sock -> sk ;
364
+ sk -> sk_prot -> disconnect (sk , 0 );
365
+ tcp_done (sk );
366
+ if (tc -> conn -> c_passive )
367
+ rds_conn_destroy (tc -> conn -> c_passive );
368
+ rds_conn_destroy (tc -> conn );
369
+ }
370
+ }
371
+
372
+ static int rds_tcp_dev_event (struct notifier_block * this ,
373
+ unsigned long event , void * ptr )
374
+ {
375
+ struct net_device * dev = netdev_notifier_info_to_dev (ptr );
376
+
377
+ /* rds-tcp registers as a pernet subys, so the ->exit will only
378
+ * get invoked after network acitivity has quiesced. We need to
379
+ * clean up all sockets to quiesce network activity, and use
380
+ * the unregistration of the per-net loopback device as a trigger
381
+ * to start that cleanup.
382
+ */
383
+ if (event == NETDEV_UNREGISTER_FINAL &&
384
+ dev -> ifindex == LOOPBACK_IFINDEX )
385
+ rds_tcp_kill_sock (dev_net (dev ));
386
+
387
+ return NOTIFY_DONE ;
388
+ }
389
+
390
+ static struct notifier_block rds_tcp_dev_notifier = {
391
+ .notifier_call = rds_tcp_dev_event ,
392
+ .priority = -10 , /* must be called after other network notifiers */
393
+ };
394
+
395
+ static void rds_tcp_exit (void )
396
+ {
397
+ rds_info_deregister_func (RDS_INFO_TCP_SOCKETS , rds_tcp_tc_info );
398
+ unregister_pernet_subsys (& rds_tcp_net_ops );
399
+ if (unregister_netdevice_notifier (& rds_tcp_dev_notifier ))
400
+ pr_warn ("could not unregister rds_tcp_dev_notifier\n" );
401
+ rds_tcp_destroy_conns ();
402
+ rds_trans_unregister (& rds_tcp_transport );
403
+ rds_tcp_recv_exit ();
404
+ kmem_cache_destroy (rds_tcp_conn_slab );
405
+ }
406
+ module_exit (rds_tcp_exit );
407
+
284
408
static int rds_tcp_init (void )
285
409
{
286
410
int ret ;
@@ -293,6 +417,16 @@ static int rds_tcp_init(void)
293
417
goto out ;
294
418
}
295
419
420
+ ret = register_netdevice_notifier (& rds_tcp_dev_notifier );
421
+ if (ret ) {
422
+ pr_warn ("could not register rds_tcp_dev_notifier\n" );
423
+ goto out ;
424
+ }
425
+
426
+ ret = register_pernet_subsys (& rds_tcp_net_ops );
427
+ if (ret )
428
+ goto out_slab ;
429
+
296
430
ret = rds_tcp_recv_init ();
297
431
if (ret )
298
432
goto out_slab ;
@@ -301,19 +435,14 @@ static int rds_tcp_init(void)
301
435
if (ret )
302
436
goto out_recv ;
303
437
304
- ret = rds_tcp_listen_init ();
305
- if (ret )
306
- goto out_register ;
307
-
308
438
rds_info_register_func (RDS_INFO_TCP_SOCKETS , rds_tcp_tc_info );
309
439
310
440
goto out ;
311
441
312
- out_register :
313
- rds_trans_unregister (& rds_tcp_transport );
314
442
out_recv :
315
443
rds_tcp_recv_exit ();
316
444
out_slab :
445
+ unregister_pernet_subsys (& rds_tcp_net_ops );
317
446
kmem_cache_destroy (rds_tcp_conn_slab );
318
447
out :
319
448
return ret ;
0 commit comments