Skip to content

Commit 27069e7

Browse files
matttbekuba-moo
authored andcommitted
mptcp: disable active MPTCP in case of blackhole
An MPTCP firewall blackhole can be detected if the following SYN retransmission after a fallback to "plain" TCP is accepted. In case of blackhole, a similar technique to the one in place with TFO is now used: MPTCP can be disabled for a certain period of time, 1h by default. This time period will grow exponentially when more blackhole issues get detected right after MPTCP is re-enabled and will reset to the initial value when the blackhole issue goes away. The blackhole period can be modified thanks to a new sysctl knob: blackhole_timeout. Two new MIB counters help understanding what's happening: - 'Blackhole', incremented when a blackhole is detected. - 'MPCapableSYNTXDisabled', incremented when an MPTCP connection directly falls back to TCP during the blackhole period. Because the technique is inspired by the one used by TFO, an important part of the new code is similar to what can find in tcp_fastopen.c, with some adaptations to the MPTCP case. Closes: multipath-tcp/mptcp_net-next#57 Signed-off-by: Matthieu Baerts (NGI0) <[email protected]> Link: https://patch.msgid.link/20240909-net-next-mptcp-fallback-x-mpc-v1-3-da7ebb4cd2a3@kernel.org Signed-off-by: Jakub Kicinski <[email protected]>
1 parent 6982826 commit 27069e7

File tree

7 files changed

+151
-8
lines changed

7 files changed

+151
-8
lines changed

Documentation/networking/mptcp-sysctl.rst

+11
Original file line numberDiff line numberDiff line change
@@ -34,6 +34,17 @@ available_schedulers - STRING
3434
Shows the available schedulers choices that are registered. More packet
3535
schedulers may be available, but not loaded.
3636

37+
blackhole_timeout - INTEGER (seconds)
38+
Initial time period in second to disable MPTCP on active MPTCP sockets
39+
when a MPTCP firewall blackhole issue happens. This time period will
40+
grow exponentially when more blackhole issues get detected right after
41+
MPTCP is re-enabled and will reset to the initial value when the
42+
blackhole issue goes away.
43+
44+
0 to disable the blackhole detection.
45+
46+
Default: 3600
47+
3748
checksum_enabled - BOOLEAN
3849
Control whether DSS checksum can be enabled.
3950

net/mptcp/ctrl.c

+117-4
Original file line numberDiff line numberDiff line change
@@ -28,8 +28,11 @@ struct mptcp_pernet {
2828
#endif
2929

3030
unsigned int add_addr_timeout;
31+
unsigned int blackhole_timeout;
3132
unsigned int close_timeout;
3233
unsigned int stale_loss_cnt;
34+
atomic_t active_disable_times;
35+
unsigned long active_disable_stamp;
3336
u8 mptcp_enabled;
3437
u8 checksum_enabled;
3538
u8 allow_join_initial_addr_port;
@@ -88,6 +91,8 @@ static void mptcp_pernet_set_defaults(struct mptcp_pernet *pernet)
8891
{
8992
pernet->mptcp_enabled = 1;
9093
pernet->add_addr_timeout = TCP_RTO_MAX;
94+
pernet->blackhole_timeout = 3600;
95+
atomic_set(&pernet->active_disable_times, 0);
9196
pernet->close_timeout = TCP_TIMEWAIT_LEN;
9297
pernet->checksum_enabled = 0;
9398
pernet->allow_join_initial_addr_port = 1;
@@ -152,6 +157,20 @@ static int proc_available_schedulers(const struct ctl_table *ctl,
152157
return ret;
153158
}
154159

160+
static int proc_blackhole_detect_timeout(const struct ctl_table *table,
161+
int write, void *buffer, size_t *lenp,
162+
loff_t *ppos)
163+
{
164+
struct mptcp_pernet *pernet = mptcp_get_pernet(current->nsproxy->net_ns);
165+
int ret;
166+
167+
ret = proc_dointvec_minmax(table, write, buffer, lenp, ppos);
168+
if (write && ret == 0)
169+
atomic_set(&pernet->active_disable_times, 0);
170+
171+
return ret;
172+
}
173+
155174
static struct ctl_table mptcp_sysctl_table[] = {
156175
{
157176
.procname = "enabled",
@@ -218,6 +237,13 @@ static struct ctl_table mptcp_sysctl_table[] = {
218237
.mode = 0644,
219238
.proc_handler = proc_dointvec_jiffies,
220239
},
240+
{
241+
.procname = "blackhole_timeout",
242+
.maxlen = sizeof(unsigned int),
243+
.mode = 0644,
244+
.proc_handler = proc_blackhole_detect_timeout,
245+
.extra1 = SYSCTL_ZERO,
246+
},
221247
};
222248

223249
static int mptcp_pernet_new_table(struct net *net, struct mptcp_pernet *pernet)
@@ -241,6 +267,7 @@ static int mptcp_pernet_new_table(struct net *net, struct mptcp_pernet *pernet)
241267
table[6].data = &pernet->scheduler;
242268
/* table[7] is for available_schedulers which is read-only info */
243269
table[8].data = &pernet->close_timeout;
270+
table[9].data = &pernet->blackhole_timeout;
244271

245272
hdr = register_net_sysctl_sz(net, MPTCP_SYSCTL_PATH, table,
246273
ARRAY_SIZE(mptcp_sysctl_table));
@@ -278,6 +305,88 @@ static void mptcp_pernet_del_table(struct mptcp_pernet *pernet) {}
278305

279306
#endif /* CONFIG_SYSCTL */
280307

308+
/* The following code block is to deal with middle box issues with MPTCP,
309+
* similar to what is done with TFO.
310+
* The proposed solution is to disable active MPTCP globally when SYN+MPC are
311+
* dropped, while SYN without MPC aren't. In this case, active side MPTCP is
312+
* disabled globally for 1hr at first. Then if it happens again, it is disabled
313+
* for 2h, then 4h, 8h, ...
314+
* The timeout is reset back to 1hr when a successful active MPTCP connection is
315+
* fully established.
316+
*/
317+
318+
/* Disable active MPTCP and record current jiffies and active_disable_times */
319+
void mptcp_active_disable(struct sock *sk)
320+
{
321+
struct net *net = sock_net(sk);
322+
struct mptcp_pernet *pernet;
323+
324+
pernet = mptcp_get_pernet(net);
325+
326+
if (!READ_ONCE(pernet->blackhole_timeout))
327+
return;
328+
329+
/* Paired with READ_ONCE() in mptcp_active_should_disable() */
330+
WRITE_ONCE(pernet->active_disable_stamp, jiffies);
331+
332+
/* Paired with smp_rmb() in mptcp_active_should_disable().
333+
* We want pernet->active_disable_stamp to be updated first.
334+
*/
335+
smp_mb__before_atomic();
336+
atomic_inc(&pernet->active_disable_times);
337+
338+
MPTCP_INC_STATS(net, MPTCP_MIB_BLACKHOLE);
339+
}
340+
341+
/* Calculate timeout for MPTCP active disable
342+
* Return true if we are still in the active MPTCP disable period
343+
* Return false if timeout already expired and we should use active MPTCP
344+
*/
345+
bool mptcp_active_should_disable(struct sock *ssk)
346+
{
347+
struct net *net = sock_net(ssk);
348+
unsigned int blackhole_timeout;
349+
struct mptcp_pernet *pernet;
350+
unsigned long timeout;
351+
int disable_times;
352+
int multiplier;
353+
354+
pernet = mptcp_get_pernet(net);
355+
blackhole_timeout = READ_ONCE(pernet->blackhole_timeout);
356+
357+
if (!blackhole_timeout)
358+
return false;
359+
360+
disable_times = atomic_read(&pernet->active_disable_times);
361+
if (!disable_times)
362+
return false;
363+
364+
/* Paired with smp_mb__before_atomic() in mptcp_active_disable() */
365+
smp_rmb();
366+
367+
/* Limit timeout to max: 2^6 * initial timeout */
368+
multiplier = 1 << min(disable_times - 1, 6);
369+
370+
/* Paired with the WRITE_ONCE() in mptcp_active_disable(). */
371+
timeout = READ_ONCE(pernet->active_disable_stamp) +
372+
multiplier * blackhole_timeout * HZ;
373+
374+
return time_before(jiffies, timeout);
375+
}
376+
377+
/* Enable active MPTCP and reset active_disable_times if needed */
378+
void mptcp_active_enable(struct sock *sk)
379+
{
380+
struct mptcp_pernet *pernet = mptcp_get_pernet(sock_net(sk));
381+
382+
if (atomic_read(&pernet->active_disable_times)) {
383+
struct dst_entry *dst = sk_dst_get(sk);
384+
385+
if (dst && dst->dev && (dst->dev->flags & IFF_LOOPBACK))
386+
atomic_set(&pernet->active_disable_times, 0);
387+
}
388+
}
389+
281390
/* Check the number of retransmissions, and fallback to TCP if needed */
282391
void mptcp_active_detect_blackhole(struct sock *ssk, bool expired)
283392
{
@@ -290,10 +399,14 @@ void mptcp_active_detect_blackhole(struct sock *ssk, bool expired)
290399
timeouts = inet_csk(ssk)->icsk_retransmits;
291400
subflow = mptcp_subflow_ctx(ssk);
292401

293-
if (subflow->request_mptcp && ssk->sk_state == TCP_SYN_SENT &&
294-
(timeouts == 2 || (timeouts < 2 && expired))) {
295-
MPTCP_INC_STATS(sock_net(ssk), MPTCP_MIB_MPCAPABLEACTIVEDROP);
296-
mptcp_subflow_early_fallback(mptcp_sk(subflow->conn), subflow);
402+
if (subflow->request_mptcp && ssk->sk_state == TCP_SYN_SENT) {
403+
if (timeouts == 2 || (timeouts < 2 && expired)) {
404+
MPTCP_INC_STATS(sock_net(ssk), MPTCP_MIB_MPCAPABLEACTIVEDROP);
405+
subflow->mpc_drop = 1;
406+
mptcp_subflow_early_fallback(mptcp_sk(subflow->conn), subflow);
407+
} else {
408+
subflow->mpc_drop = 0;
409+
}
297410
}
298411
}
299412

net/mptcp/mib.c

+2
Original file line numberDiff line numberDiff line change
@@ -16,6 +16,7 @@ static const struct snmp_mib mptcp_snmp_list[] = {
1616
SNMP_MIB_ITEM("MPCapableFallbackACK", MPTCP_MIB_MPCAPABLEPASSIVEFALLBACK),
1717
SNMP_MIB_ITEM("MPCapableFallbackSYNACK", MPTCP_MIB_MPCAPABLEACTIVEFALLBACK),
1818
SNMP_MIB_ITEM("MPCapableSYNTXDrop", MPTCP_MIB_MPCAPABLEACTIVEDROP),
19+
SNMP_MIB_ITEM("MPCapableSYNTXDisabled", MPTCP_MIB_MPCAPABLEACTIVEDISABLED),
1920
SNMP_MIB_ITEM("MPFallbackTokenInit", MPTCP_MIB_TOKENFALLBACKINIT),
2021
SNMP_MIB_ITEM("MPTCPRetrans", MPTCP_MIB_RETRANSSEGS),
2122
SNMP_MIB_ITEM("MPJoinNoTokenFound", MPTCP_MIB_JOINNOTOKEN),
@@ -74,6 +75,7 @@ static const struct snmp_mib mptcp_snmp_list[] = {
7475
SNMP_MIB_ITEM("RcvWndConflictUpdate", MPTCP_MIB_RCVWNDCONFLICTUPDATE),
7576
SNMP_MIB_ITEM("RcvWndConflict", MPTCP_MIB_RCVWNDCONFLICT),
7677
SNMP_MIB_ITEM("MPCurrEstab", MPTCP_MIB_CURRESTAB),
78+
SNMP_MIB_ITEM("Blackhole", MPTCP_MIB_BLACKHOLE),
7779
SNMP_MIB_SENTINEL
7880
};
7981

net/mptcp/mib.h

+2
Original file line numberDiff line numberDiff line change
@@ -11,6 +11,7 @@ enum linux_mptcp_mib_field {
1111
MPTCP_MIB_MPCAPABLEPASSIVEFALLBACK,/* Server-side fallback during 3-way handshake */
1212
MPTCP_MIB_MPCAPABLEACTIVEFALLBACK, /* Client-side fallback during 3-way handshake */
1313
MPTCP_MIB_MPCAPABLEACTIVEDROP, /* Client-side fallback due to a MPC drop */
14+
MPTCP_MIB_MPCAPABLEACTIVEDISABLED, /* Client-side disabled due to past issues */
1415
MPTCP_MIB_TOKENFALLBACKINIT, /* Could not init/allocate token */
1516
MPTCP_MIB_RETRANSSEGS, /* Segments retransmitted at the MPTCP-level */
1617
MPTCP_MIB_JOINNOTOKEN, /* Received MP_JOIN but the token was not found */
@@ -75,6 +76,7 @@ enum linux_mptcp_mib_field {
7576
*/
7677
MPTCP_MIB_RCVWNDCONFLICT, /* Conflict with while updating msk rcv wnd */
7778
MPTCP_MIB_CURRESTAB, /* Current established MPTCP connections */
79+
MPTCP_MIB_BLACKHOLE, /* A blackhole has been detected */
7880
__MPTCP_MIB_MAX
7981
};
8082

net/mptcp/protocol.c

+8-3
Original file line numberDiff line numberDiff line change
@@ -3737,9 +3737,14 @@ static int mptcp_connect(struct sock *sk, struct sockaddr *uaddr, int addr_len)
37373737
if (rcu_access_pointer(tcp_sk(ssk)->md5sig_info))
37383738
mptcp_subflow_early_fallback(msk, subflow);
37393739
#endif
3740-
if (subflow->request_mptcp && mptcp_token_new_connect(ssk)) {
3741-
MPTCP_INC_STATS(sock_net(ssk), MPTCP_MIB_TOKENFALLBACKINIT);
3742-
mptcp_subflow_early_fallback(msk, subflow);
3740+
if (subflow->request_mptcp) {
3741+
if (mptcp_active_should_disable(sk)) {
3742+
MPTCP_INC_STATS(sock_net(ssk), MPTCP_MIB_MPCAPABLEACTIVEDISABLED);
3743+
mptcp_subflow_early_fallback(msk, subflow);
3744+
} else if (mptcp_token_new_connect(ssk) < 0) {
3745+
MPTCP_INC_STATS(sock_net(ssk), MPTCP_MIB_TOKENFALLBACKINIT);
3746+
mptcp_subflow_early_fallback(msk, subflow);
3747+
}
37433748
}
37443749

37453750
WRITE_ONCE(msk->write_seq, subflow->idsn);

net/mptcp/protocol.h

+7-1
Original file line numberDiff line numberDiff line change
@@ -531,7 +531,8 @@ struct mptcp_subflow_context {
531531
valid_csum_seen : 1, /* at least one csum validated */
532532
is_mptfo : 1, /* subflow is doing TFO */
533533
close_event_done : 1, /* has done the post-closed part */
534-
__unused : 9;
534+
mpc_drop : 1, /* the MPC option has been dropped in a rtx */
535+
__unused : 8;
535536
bool data_avail;
536537
bool scheduled;
537538
u32 remote_nonce;
@@ -697,6 +698,11 @@ unsigned int mptcp_stale_loss_cnt(const struct net *net);
697698
unsigned int mptcp_close_timeout(const struct sock *sk);
698699
int mptcp_get_pm_type(const struct net *net);
699700
const char *mptcp_get_scheduler(const struct net *net);
701+
702+
void mptcp_active_disable(struct sock *sk);
703+
bool mptcp_active_should_disable(struct sock *ssk);
704+
void mptcp_active_enable(struct sock *sk);
705+
700706
void mptcp_get_available_schedulers(char *buf, size_t maxlen);
701707
void __mptcp_subflow_fully_established(struct mptcp_sock *msk,
702708
struct mptcp_subflow_context *subflow,

net/mptcp/subflow.c

+4
Original file line numberDiff line numberDiff line change
@@ -546,6 +546,7 @@ static void subflow_finish_connect(struct sock *sk, const struct sk_buff *skb)
546546
subflow->mp_capable = 1;
547547
MPTCP_INC_STATS(sock_net(sk), MPTCP_MIB_MPCAPABLEACTIVEACK);
548548
mptcp_finish_connect(sk);
549+
mptcp_active_enable(parent);
549550
mptcp_propagate_state(parent, sk, subflow, &mp_opt);
550551
} else if (subflow->request_join) {
551552
u8 hmac[SHA256_DIGEST_SIZE];
@@ -591,6 +592,9 @@ static void subflow_finish_connect(struct sock *sk, const struct sk_buff *skb)
591592
MPTCP_INC_STATS(sock_net(sk), MPTCP_MIB_JOINPORTSYNACKRX);
592593
}
593594
} else if (mptcp_check_fallback(sk)) {
595+
/* It looks like MPTCP is blocked, while TCP is not */
596+
if (subflow->mpc_drop)
597+
mptcp_active_disable(parent);
594598
fallback:
595599
mptcp_propagate_state(parent, sk, subflow, NULL);
596600
}

0 commit comments

Comments
 (0)