Skip to content

Commit 5ccbf89

Browse files
committed
Pablo Neira Ayuso says: ==================== IPVS updates for v5.5 1) Two patches to speedup ipvs netns dismantle, from Haishuang Yan. 2) Three patches to add selftest script for ipvs, also from Haishuang Yan. 3) Simplify __ip_vs_get_out_rt() from zhang kai. ==================== Signed-off-by: Pablo Neira Ayuso <[email protected]>
2 parents 63f55ac + 176a520 commit 5ccbf89

File tree

6 files changed

+273
-36
lines changed

6 files changed

+273
-36
lines changed

include/net/ip_vs.h

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1324,7 +1324,7 @@ void ip_vs_protocol_net_cleanup(struct netns_ipvs *ipvs);
13241324
void ip_vs_control_net_cleanup(struct netns_ipvs *ipvs);
13251325
void ip_vs_estimator_net_cleanup(struct netns_ipvs *ipvs);
13261326
void ip_vs_sync_net_cleanup(struct netns_ipvs *ipvs);
1327-
void ip_vs_service_net_cleanup(struct netns_ipvs *ipvs);
1327+
void ip_vs_service_nets_cleanup(struct list_head *net_list);
13281328

13291329
/* IPVS application functions
13301330
* (from ip_vs_app.c)

net/netfilter/ipvs/ip_vs_core.c

Lines changed: 28 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -2402,18 +2402,22 @@ static int __net_init __ip_vs_init(struct net *net)
24022402
return -ENOMEM;
24032403
}
24042404

2405-
static void __net_exit __ip_vs_cleanup(struct net *net)
2405+
static void __net_exit __ip_vs_cleanup_batch(struct list_head *net_list)
24062406
{
2407-
struct netns_ipvs *ipvs = net_ipvs(net);
2408-
2409-
ip_vs_service_net_cleanup(ipvs); /* ip_vs_flush() with locks */
2410-
ip_vs_conn_net_cleanup(ipvs);
2411-
ip_vs_app_net_cleanup(ipvs);
2412-
ip_vs_protocol_net_cleanup(ipvs);
2413-
ip_vs_control_net_cleanup(ipvs);
2414-
ip_vs_estimator_net_cleanup(ipvs);
2415-
IP_VS_DBG(2, "ipvs netns %d released\n", ipvs->gen);
2416-
net->ipvs = NULL;
2407+
struct netns_ipvs *ipvs;
2408+
struct net *net;
2409+
2410+
ip_vs_service_nets_cleanup(net_list); /* ip_vs_flush() with locks */
2411+
list_for_each_entry(net, net_list, exit_list) {
2412+
ipvs = net_ipvs(net);
2413+
ip_vs_conn_net_cleanup(ipvs);
2414+
ip_vs_app_net_cleanup(ipvs);
2415+
ip_vs_protocol_net_cleanup(ipvs);
2416+
ip_vs_control_net_cleanup(ipvs);
2417+
ip_vs_estimator_net_cleanup(ipvs);
2418+
IP_VS_DBG(2, "ipvs netns %d released\n", ipvs->gen);
2419+
net->ipvs = NULL;
2420+
}
24172421
}
24182422

24192423
static int __net_init __ip_vs_dev_init(struct net *net)
@@ -2429,27 +2433,32 @@ static int __net_init __ip_vs_dev_init(struct net *net)
24292433
return ret;
24302434
}
24312435

2432-
static void __net_exit __ip_vs_dev_cleanup(struct net *net)
2436+
static void __net_exit __ip_vs_dev_cleanup_batch(struct list_head *net_list)
24332437
{
2434-
struct netns_ipvs *ipvs = net_ipvs(net);
2438+
struct netns_ipvs *ipvs;
2439+
struct net *net;
2440+
24352441
EnterFunction(2);
2436-
nf_unregister_net_hooks(net, ip_vs_ops, ARRAY_SIZE(ip_vs_ops));
2437-
ipvs->enable = 0; /* Disable packet reception */
2438-
smp_wmb();
2439-
ip_vs_sync_net_cleanup(ipvs);
2442+
list_for_each_entry(net, net_list, exit_list) {
2443+
ipvs = net_ipvs(net);
2444+
nf_unregister_net_hooks(net, ip_vs_ops, ARRAY_SIZE(ip_vs_ops));
2445+
ipvs->enable = 0; /* Disable packet reception */
2446+
smp_wmb();
2447+
ip_vs_sync_net_cleanup(ipvs);
2448+
}
24402449
LeaveFunction(2);
24412450
}
24422451

24432452
static struct pernet_operations ipvs_core_ops = {
24442453
.init = __ip_vs_init,
2445-
.exit = __ip_vs_cleanup,
2454+
.exit_batch = __ip_vs_cleanup_batch,
24462455
.id = &ip_vs_net_id,
24472456
.size = sizeof(struct netns_ipvs),
24482457
};
24492458

24502459
static struct pernet_operations ipvs_core_dev_ops = {
24512460
.init = __ip_vs_dev_init,
2452-
.exit = __ip_vs_dev_cleanup,
2461+
.exit_batch = __ip_vs_dev_cleanup_batch,
24532462
};
24542463

24552464
/*

net/netfilter/ipvs/ip_vs_ctl.c

Lines changed: 9 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1607,14 +1607,20 @@ static int ip_vs_flush(struct netns_ipvs *ipvs, bool cleanup)
16071607

16081608
/*
16091609
* Delete service by {netns} in the service table.
1610-
* Called by __ip_vs_cleanup()
1610+
* Called by __ip_vs_batch_cleanup()
16111611
*/
1612-
void ip_vs_service_net_cleanup(struct netns_ipvs *ipvs)
1612+
void ip_vs_service_nets_cleanup(struct list_head *net_list)
16131613
{
1614+
struct netns_ipvs *ipvs;
1615+
struct net *net;
1616+
16141617
EnterFunction(2);
16151618
/* Check for "full" addressed entries */
16161619
mutex_lock(&__ip_vs_mutex);
1617-
ip_vs_flush(ipvs, true);
1620+
list_for_each_entry(net, net_list, exit_list) {
1621+
ipvs = net_ipvs(net);
1622+
ip_vs_flush(ipvs, true);
1623+
}
16181624
mutex_unlock(&__ip_vs_mutex);
16191625
LeaveFunction(2);
16201626
}

net/netfilter/ipvs/ip_vs_xmit.c

Lines changed: 6 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -407,12 +407,9 @@ __ip_vs_get_out_rt(struct netns_ipvs *ipvs, int skb_af, struct sk_buff *skb,
407407
goto err_put;
408408

409409
skb_dst_drop(skb);
410-
if (noref) {
411-
if (!local)
412-
skb_dst_set_noref(skb, &rt->dst);
413-
else
414-
skb_dst_set(skb, dst_clone(&rt->dst));
415-
} else
410+
if (noref)
411+
skb_dst_set_noref(skb, &rt->dst);
412+
else
416413
skb_dst_set(skb, &rt->dst);
417414

418415
return local;
@@ -574,12 +571,9 @@ __ip_vs_get_out_rt_v6(struct netns_ipvs *ipvs, int skb_af, struct sk_buff *skb,
574571
goto err_put;
575572

576573
skb_dst_drop(skb);
577-
if (noref) {
578-
if (!local)
579-
skb_dst_set_noref(skb, &rt->dst);
580-
else
581-
skb_dst_set(skb, dst_clone(&rt->dst));
582-
} else
574+
if (noref)
575+
skb_dst_set_noref(skb, &rt->dst);
576+
else
583577
skb_dst_set(skb, &rt->dst);
584578

585579
return local;

tools/testing/selftests/netfilter/Makefile

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,6 @@
22
# Makefile for netfilter selftests
33

44
TEST_PROGS := nft_trans_stress.sh nft_nat.sh bridge_brouter.sh \
5-
conntrack_icmp_related.sh nft_flowtable.sh
5+
conntrack_icmp_related.sh nft_flowtable.sh ipvs.sh
66

77
include ../lib.mk
Lines changed: 228 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,228 @@
1+
#!/bin/sh
2+
# SPDX-License-Identifier: GPL-2.0
3+
#
4+
# End-to-end ipvs test suite
5+
# Topology:
6+
#--------------------------------------------------------------+
7+
# | |
8+
# ns0 | ns1 |
9+
# ----------- | ----------- ----------- |
10+
# | veth01 | --------- | veth10 | | veth12 | |
11+
# ----------- peer ----------- ----------- |
12+
# | | | |
13+
# ----------- | | |
14+
# | br0 | |----------------- peer |--------------|
15+
# ----------- | | |
16+
# | | | |
17+
# ---------- peer ---------- ----------- |
18+
# | veth02 | --------- | veth20 | | veth21 | |
19+
# ---------- | ---------- ----------- |
20+
# | ns2 |
21+
# | |
22+
#--------------------------------------------------------------+
23+
#
24+
# We assume that all network driver are loaded
25+
#
26+
27+
# Kselftest framework requirement - SKIP code is 4.
28+
ksft_skip=4
29+
ret=0
30+
GREEN='\033[0;92m'
31+
RED='\033[0;31m'
32+
NC='\033[0m' # No Color
33+
34+
readonly port=8080
35+
36+
readonly vip_v4=207.175.44.110
37+
readonly cip_v4=10.0.0.2
38+
readonly gip_v4=10.0.0.1
39+
readonly dip_v4=172.16.0.1
40+
readonly rip_v4=172.16.0.2
41+
readonly sip_v4=10.0.0.3
42+
43+
readonly infile="$(mktemp)"
44+
readonly outfile="$(mktemp)"
45+
readonly datalen=32
46+
47+
sysipvsnet="/proc/sys/net/ipv4/vs/"
48+
if [ ! -d $sysipvsnet ]; then
49+
modprobe -q ip_vs
50+
if [ $? -ne 0 ]; then
51+
echo "skip: could not run test without ipvs module"
52+
exit $ksft_skip
53+
fi
54+
fi
55+
56+
ip -Version > /dev/null 2>&1
57+
if [ $? -ne 0 ]; then
58+
echo "SKIP: Could not run test without ip tool"
59+
exit $ksft_skip
60+
fi
61+
62+
ipvsadm -v > /dev/null 2>&1
63+
if [ $? -ne 0 ]; then
64+
echo "SKIP: Could not run test without ipvsadm"
65+
exit $ksft_skip
66+
fi
67+
68+
setup() {
69+
ip netns add ns0
70+
ip netns add ns1
71+
ip netns add ns2
72+
73+
ip link add veth01 netns ns0 type veth peer name veth10 netns ns1
74+
ip link add veth02 netns ns0 type veth peer name veth20 netns ns2
75+
ip link add veth12 netns ns1 type veth peer name veth21 netns ns2
76+
77+
ip netns exec ns0 ip link set veth01 up
78+
ip netns exec ns0 ip link set veth02 up
79+
ip netns exec ns0 ip link add br0 type bridge
80+
ip netns exec ns0 ip link set veth01 master br0
81+
ip netns exec ns0 ip link set veth02 master br0
82+
ip netns exec ns0 ip link set br0 up
83+
ip netns exec ns0 ip addr add ${cip_v4}/24 dev br0
84+
85+
ip netns exec ns1 ip link set lo up
86+
ip netns exec ns1 ip link set veth10 up
87+
ip netns exec ns1 ip addr add ${gip_v4}/24 dev veth10
88+
ip netns exec ns1 ip link set veth12 up
89+
ip netns exec ns1 ip addr add ${dip_v4}/24 dev veth12
90+
91+
ip netns exec ns2 ip link set lo up
92+
ip netns exec ns2 ip link set veth21 up
93+
ip netns exec ns2 ip addr add ${rip_v4}/24 dev veth21
94+
ip netns exec ns2 ip link set veth20 up
95+
ip netns exec ns2 ip addr add ${sip_v4}/24 dev veth20
96+
97+
sleep 1
98+
99+
dd if=/dev/urandom of="${infile}" bs="${datalen}" count=1 status=none
100+
}
101+
102+
cleanup() {
103+
for i in 0 1 2
104+
do
105+
ip netns del ns$i > /dev/null 2>&1
106+
done
107+
108+
if [ -f "${outfile}" ]; then
109+
rm "${outfile}"
110+
fi
111+
if [ -f "${infile}" ]; then
112+
rm "${infile}"
113+
fi
114+
}
115+
116+
server_listen() {
117+
ip netns exec ns2 nc -l -p 8080 > "${outfile}" &
118+
server_pid=$!
119+
sleep 0.2
120+
}
121+
122+
client_connect() {
123+
ip netns exec ns0 timeout 2 nc -w 1 ${vip_v4} ${port} < "${infile}"
124+
}
125+
126+
verify_data() {
127+
wait "${server_pid}"
128+
cmp "$infile" "$outfile" 2>/dev/null
129+
}
130+
131+
test_service() {
132+
server_listen
133+
client_connect
134+
verify_data
135+
}
136+
137+
138+
test_dr() {
139+
ip netns exec ns0 ip route add ${vip_v4} via ${gip_v4} dev br0
140+
141+
ip netns exec ns1 sysctl -qw net.ipv4.ip_forward=1
142+
ip netns exec ns1 ipvsadm -A -t ${vip_v4}:${port} -s rr
143+
ip netns exec ns1 ipvsadm -a -t ${vip_v4}:${port} -r ${rip_v4}:${port}
144+
ip netns exec ns1 ip addr add ${vip_v4}/32 dev lo:1
145+
146+
# avoid incorrect arp response
147+
ip netns exec ns2 sysctl -qw net.ipv4.conf.all.arp_ignore=1
148+
ip netns exec ns2 sysctl -qw net.ipv4.conf.all.arp_announce=2
149+
# avoid reverse route lookup
150+
ip netns exec ns2 sysctl -qw net.ipv4.conf.all.rp_filter=0
151+
ip netns exec ns2 sysctl -qw net.ipv4.conf.veth21.rp_filter=0
152+
ip netns exec ns2 ip addr add ${vip_v4}/32 dev lo:1
153+
154+
test_service
155+
}
156+
157+
test_nat() {
158+
ip netns exec ns0 ip route add ${vip_v4} via ${gip_v4} dev br0
159+
160+
ip netns exec ns1 sysctl -qw net.ipv4.ip_forward=1
161+
ip netns exec ns1 ipvsadm -A -t ${vip_v4}:${port} -s rr
162+
ip netns exec ns1 ipvsadm -a -m -t ${vip_v4}:${port} -r ${rip_v4}:${port}
163+
ip netns exec ns1 ip addr add ${vip_v4}/32 dev lo:1
164+
165+
ip netns exec ns2 ip link del veth20
166+
ip netns exec ns2 ip route add default via ${dip_v4} dev veth21
167+
168+
test_service
169+
}
170+
171+
test_tun() {
172+
ip netns exec ns0 ip route add ${vip_v4} via ${gip_v4} dev br0
173+
174+
ip netns exec ns1 modprobe ipip
175+
ip netns exec ns1 ip link set tunl0 up
176+
ip netns exec ns1 sysctl -qw net.ipv4.ip_forward=0
177+
ip netns exec ns1 sysctl -qw net.ipv4.conf.all.send_redirects=0
178+
ip netns exec ns1 sysctl -qw net.ipv4.conf.default.send_redirects=0
179+
ip netns exec ns1 ipvsadm -A -t ${vip_v4}:${port} -s rr
180+
ip netns exec ns1 ipvsadm -a -i -t ${vip_v4}:${port} -r ${rip_v4}:${port}
181+
ip netns exec ns1 ip addr add ${vip_v4}/32 dev lo:1
182+
183+
ip netns exec ns2 modprobe ipip
184+
ip netns exec ns2 ip link set tunl0 up
185+
ip netns exec ns2 sysctl -qw net.ipv4.conf.all.arp_ignore=1
186+
ip netns exec ns2 sysctl -qw net.ipv4.conf.all.arp_announce=2
187+
ip netns exec ns2 sysctl -qw net.ipv4.conf.all.rp_filter=0
188+
ip netns exec ns2 sysctl -qw net.ipv4.conf.tunl0.rp_filter=0
189+
ip netns exec ns2 sysctl -qw net.ipv4.conf.veth21.rp_filter=0
190+
ip netns exec ns2 ip addr add ${vip_v4}/32 dev lo:1
191+
192+
test_service
193+
}
194+
195+
run_tests() {
196+
local errors=
197+
198+
echo "Testing DR mode..."
199+
cleanup
200+
setup
201+
test_dr
202+
errors=$(( $errors + $? ))
203+
204+
echo "Testing NAT mode..."
205+
cleanup
206+
setup
207+
test_nat
208+
errors=$(( $errors + $? ))
209+
210+
echo "Testing Tunnel mode..."
211+
cleanup
212+
setup
213+
test_tun
214+
errors=$(( $errors + $? ))
215+
216+
return $errors
217+
}
218+
219+
trap cleanup EXIT
220+
221+
run_tests
222+
223+
if [ $? -ne 0 ]; then
224+
echo -e "$(basename $0): ${RED}FAIL${NC}"
225+
exit 1
226+
fi
227+
echo -e "$(basename $0): ${GREEN}PASS${NC}"
228+
exit 0

0 commit comments

Comments
 (0)