@@ -28,8 +28,11 @@ struct mptcp_pernet {
28
28
#endif
29
29
30
30
unsigned int add_addr_timeout;
31
+ unsigned int blackhole_timeout;
31
32
unsigned int close_timeout;
32
33
unsigned int stale_loss_cnt;
34
+ atomic_t active_disable_times;
35
+ unsigned long active_disable_stamp;
33
36
u8 mptcp_enabled;
34
37
u8 checksum_enabled;
35
38
u8 allow_join_initial_addr_port;
@@ -88,6 +91,8 @@ static void mptcp_pernet_set_defaults(struct mptcp_pernet *pernet)
88
91
{
89
92
pernet->mptcp_enabled = 1;
90
93
pernet->add_addr_timeout = TCP_RTO_MAX;
94
+ pernet->blackhole_timeout = 3600;
95
+ atomic_set(&pernet->active_disable_times, 0);
91
96
pernet->close_timeout = TCP_TIMEWAIT_LEN;
92
97
pernet->checksum_enabled = 0;
93
98
pernet->allow_join_initial_addr_port = 1;
@@ -152,6 +157,20 @@ static int proc_available_schedulers(const struct ctl_table *ctl,
152
157
return ret;
153
158
}
154
159
160
+ static int proc_blackhole_detect_timeout(const struct ctl_table *table,
161
+ int write, void *buffer, size_t *lenp,
162
+ loff_t *ppos)
163
+ {
164
+ struct mptcp_pernet *pernet = mptcp_get_pernet(current->nsproxy->net_ns);
165
+ int ret;
166
+
167
+ ret = proc_dointvec_minmax(table, write, buffer, lenp, ppos);
168
+ if (write && ret == 0)
169
+ atomic_set(&pernet->active_disable_times, 0);
170
+
171
+ return ret;
172
+ }
173
+
155
174
static struct ctl_table mptcp_sysctl_table[] = {
156
175
{
157
176
.procname = "enabled",
@@ -218,6 +237,13 @@ static struct ctl_table mptcp_sysctl_table[] = {
218
237
.mode = 0644,
219
238
.proc_handler = proc_dointvec_jiffies,
220
239
},
240
+ {
241
+ .procname = "blackhole_timeout",
242
+ .maxlen = sizeof(unsigned int),
243
+ .mode = 0644,
244
+ .proc_handler = proc_blackhole_detect_timeout,
245
+ .extra1 = SYSCTL_ZERO,
246
+ },
221
247
};
222
248
223
249
static int mptcp_pernet_new_table(struct net *net, struct mptcp_pernet *pernet)
@@ -241,6 +267,7 @@ static int mptcp_pernet_new_table(struct net *net, struct mptcp_pernet *pernet)
241
267
table[6].data = &pernet->scheduler;
242
268
/* table[7] is for available_schedulers which is read-only info */
243
269
table[8].data = &pernet->close_timeout;
270
+ table[9].data = &pernet->blackhole_timeout;
244
271
245
272
hdr = register_net_sysctl_sz(net, MPTCP_SYSCTL_PATH, table,
246
273
ARRAY_SIZE(mptcp_sysctl_table));
@@ -278,6 +305,88 @@ static void mptcp_pernet_del_table(struct mptcp_pernet *pernet) {}
278
305
279
306
#endif /* CONFIG_SYSCTL */
280
307
308
+ /* The following code block is to deal with middle box issues with MPTCP,
309
+ * similar to what is done with TFO.
310
+ * The proposed solution is to disable active MPTCP globally when SYN+MPC are
311
+ * dropped, while SYN without MPC aren't. In this case, active side MPTCP is
312
+ * disabled globally for 1hr at first. Then if it happens again, it is disabled
313
+ * for 2h, then 4h, 8h, ...
314
+ * The timeout is reset back to 1hr when a successful active MPTCP connection is
315
+ * fully established.
316
+ */
317
+
318
+ /* Disable active MPTCP and record current jiffies and active_disable_times */
319
+ void mptcp_active_disable(struct sock *sk)
320
+ {
321
+ struct net *net = sock_net(sk);
322
+ struct mptcp_pernet *pernet;
323
+
324
+ pernet = mptcp_get_pernet(net);
325
+
326
+ if (!READ_ONCE(pernet->blackhole_timeout))
327
+ return;
328
+
329
+ /* Paired with READ_ONCE() in mptcp_active_should_disable() */
330
+ WRITE_ONCE(pernet->active_disable_stamp, jiffies);
331
+
332
+ /* Paired with smp_rmb() in mptcp_active_should_disable().
333
+ * We want pernet->active_disable_stamp to be updated first.
334
+ */
335
+ smp_mb__before_atomic();
336
+ atomic_inc(&pernet->active_disable_times);
337
+
338
+ MPTCP_INC_STATS(net, MPTCP_MIB_BLACKHOLE);
339
+ }
340
+
341
+ /* Calculate timeout for MPTCP active disable
342
+ * Return true if we are still in the active MPTCP disable period
343
+ * Return false if timeout already expired and we should use active MPTCP
344
+ */
345
+ bool mptcp_active_should_disable(struct sock *ssk)
346
+ {
347
+ struct net *net = sock_net(ssk);
348
+ unsigned int blackhole_timeout;
349
+ struct mptcp_pernet *pernet;
350
+ unsigned long timeout;
351
+ int disable_times;
352
+ int multiplier;
353
+
354
+ pernet = mptcp_get_pernet(net);
355
+ blackhole_timeout = READ_ONCE(pernet->blackhole_timeout);
356
+
357
+ if (!blackhole_timeout)
358
+ return false;
359
+
360
+ disable_times = atomic_read(&pernet->active_disable_times);
361
+ if (!disable_times)
362
+ return false;
363
+
364
+ /* Paired with smp_mb__before_atomic() in mptcp_active_disable() */
365
+ smp_rmb();
366
+
367
+ /* Limit timeout to max: 2^6 * initial timeout */
368
+ multiplier = 1 << min(disable_times - 1, 6);
369
+
370
+ /* Paired with the WRITE_ONCE() in mptcp_active_disable(). */
371
+ timeout = READ_ONCE(pernet->active_disable_stamp) +
372
+ multiplier * blackhole_timeout * HZ;
373
+
374
+ return time_before(jiffies, timeout);
375
+ }
376
+
377
+ /* Enable active MPTCP and reset active_disable_times if needed */
378
+ void mptcp_active_enable(struct sock *sk)
379
+ {
380
+ struct mptcp_pernet *pernet = mptcp_get_pernet(sock_net(sk));
381
+
382
+ if (atomic_read(&pernet->active_disable_times)) {
383
+ struct dst_entry *dst = sk_dst_get(sk);
384
+
385
+ if (dst && dst->dev && (dst->dev->flags & IFF_LOOPBACK))
386
+ atomic_set(&pernet->active_disable_times, 0);
387
+ }
388
+ }
389
+
281
390
/* Check the number of retransmissions, and fallback to TCP if needed */
282
391
void mptcp_active_detect_blackhole(struct sock *ssk, bool expired)
283
392
{
@@ -290,10 +399,14 @@ void mptcp_active_detect_blackhole(struct sock *ssk, bool expired)
290
399
timeouts = inet_csk(ssk)->icsk_retransmits;
291
400
subflow = mptcp_subflow_ctx(ssk);
292
401
293
- if (subflow->request_mptcp && ssk->sk_state == TCP_SYN_SENT &&
294
- (timeouts == 2 || (timeouts < 2 && expired))) {
295
- MPTCP_INC_STATS(sock_net(ssk), MPTCP_MIB_MPCAPABLEACTIVEDROP);
296
- mptcp_subflow_early_fallback(mptcp_sk(subflow->conn), subflow);
402
+ if (subflow->request_mptcp && ssk->sk_state == TCP_SYN_SENT) {
403
+ if (timeouts == 2 || (timeouts < 2 && expired)) {
404
+ MPTCP_INC_STATS(sock_net(ssk), MPTCP_MIB_MPCAPABLEACTIVEDROP);
405
+ subflow->mpc_drop = 1;
406
+ mptcp_subflow_early_fallback(mptcp_sk(subflow->conn), subflow);
407
+ } else {
408
+ subflow->mpc_drop = 0;
409
+ }
297
410
}
298
411
}
299
412
0 commit comments