Skip to content

Commit b713d00

Browse files
Paolo Abenikuba-moo
authored andcommitted
mptcp: really share subflow snd_wnd
As per RFC, mptcp subflows use a "shared" snd_wnd: the effective window is the maximum among the current values received on all subflows. Without such feature a data transfer using multiple subflows could block. Window sharing is currently implemented in the RX side: __tcp_select_window uses the mptcp-level receive buffer to compute the announced window. That is not enough: the TCP stack will stick to the window size received on the given subflow; we need to propagate the msk window value on each subflow at xmit time. Change the packet scheduler to ignore the subflow level window and use instead the msk level one Signed-off-by: Paolo Abeni <[email protected]> Signed-off-by: Mat Martineau <[email protected]> Signed-off-by: Jakub Kicinski <[email protected]>
1 parent 10b4a11 commit b713d00

File tree

1 file changed

+15
-9
lines changed

1 file changed

+15
-9
lines changed

net/mptcp/protocol.c

Lines changed: 15 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -1141,19 +1141,20 @@ struct mptcp_sendmsg_info {
11411141
bool data_lock_held;
11421142
};
11431143

1144-
static int mptcp_check_allowed_size(struct mptcp_sock *msk, u64 data_seq,
1145-
int avail_size)
1144+
static int mptcp_check_allowed_size(const struct mptcp_sock *msk, struct sock *ssk,
1145+
u64 data_seq, int avail_size)
11461146
{
11471147
u64 window_end = mptcp_wnd_end(msk);
1148+
u64 mptcp_snd_wnd;
11481149

11491150
if (__mptcp_check_fallback(msk))
11501151
return avail_size;
11511152

1152-
if (!before64(data_seq + avail_size, window_end)) {
1153-
u64 allowed_size = window_end - data_seq;
1153+
mptcp_snd_wnd = window_end - data_seq;
1154+
avail_size = min_t(unsigned int, mptcp_snd_wnd, avail_size);
11541155

1155-
return min_t(unsigned int, allowed_size, avail_size);
1156-
}
1156+
if (unlikely(tcp_sk(ssk)->snd_wnd < mptcp_snd_wnd))
1157+
tcp_sk(ssk)->snd_wnd = min_t(u64, U32_MAX, mptcp_snd_wnd);
11571158

11581159
return avail_size;
11591160
}
@@ -1305,7 +1306,7 @@ static int mptcp_sendmsg_frag(struct sock *sk, struct sock *ssk,
13051306
}
13061307

13071308
/* Zero window and all data acked? Probe. */
1308-
copy = mptcp_check_allowed_size(msk, data_seq, copy);
1309+
copy = mptcp_check_allowed_size(msk, ssk, data_seq, copy);
13091310
if (copy == 0) {
13101311
u64 snd_una = READ_ONCE(msk->snd_una);
13111312

@@ -1498,11 +1499,16 @@ static struct sock *mptcp_subflow_get_send(struct mptcp_sock *msk)
14981499
* to check that subflow has a non empty cwin.
14991500
*/
15001501
ssk = send_info[SSK_MODE_ACTIVE].ssk;
1501-
if (!ssk || !sk_stream_memory_free(ssk) || !tcp_sk(ssk)->snd_wnd)
1502+
if (!ssk || !sk_stream_memory_free(ssk))
15021503
return NULL;
15031504

1504-
burst = min_t(int, MPTCP_SEND_BURST_SIZE, tcp_sk(ssk)->snd_wnd);
1505+
burst = min_t(int, MPTCP_SEND_BURST_SIZE, mptcp_wnd_end(msk) - msk->snd_nxt);
15051506
wmem = READ_ONCE(ssk->sk_wmem_queued);
1507+
if (!burst) {
1508+
msk->last_snd = NULL;
1509+
return ssk;
1510+
}
1511+
15061512
subflow = mptcp_subflow_ctx(ssk);
15071513
subflow->avg_pacing_rate = div_u64((u64)subflow->avg_pacing_rate * wmem +
15081514
READ_ONCE(ssk->sk_pacing_rate) * burst,

0 commit comments

Comments
 (0)