@@ -664,10 +664,12 @@ EXPORT_IPV6_MOD(tcp_initialize_rcv_mss);
664664 */
665665static void tcp_rcv_rtt_update (struct tcp_sock * tp , u32 sample , int win_dep )
666666{
667- u32 new_sample = tp -> rcv_rtt_est .rtt_us ;
668- long m = sample ;
667+ u32 new_sample , old_sample = tp -> rcv_rtt_est .rtt_us ;
668+ long m = sample << 3 ;
669669
670- if (new_sample != 0 ) {
670+ if (old_sample == 0 || m < old_sample ) {
671+ new_sample = m ;
672+ } else {
671673 /* If we sample in larger samples in the non-timestamp
672674 * case, we could grossly overestimate the RTT especially
673675 * with chatty applications or bulk transfer apps which
@@ -678,17 +680,12 @@ static void tcp_rcv_rtt_update(struct tcp_sock *tp, u32 sample, int win_dep)
678680 * else with timestamps disabled convergence takes too
679681 * long.
680682 */
681- if (!win_dep ) {
682- m -= (new_sample >> 3 );
683- new_sample += m ;
684- } else {
685- m <<= 3 ;
686- if (m < new_sample )
687- new_sample = m ;
688- }
689- } else {
690- /* No previous measure. */
691- new_sample = m << 3 ;
683+ if (win_dep )
684+ return ;
685+ /* Do not use this sample if receive queue is not empty. */
686+ if (tp -> rcv_nxt != tp -> copied_seq )
687+ return ;
688+ new_sample = old_sample - (old_sample >> 3 ) + sample ;
692689 }
693690
694691 tp -> rcv_rtt_est .rtt_us = new_sample ;
@@ -712,7 +709,7 @@ static inline void tcp_rcv_rtt_measure(struct tcp_sock *tp)
712709 tp -> rcv_rtt_est .time = tp -> tcp_mstamp ;
713710}
714711
715- static s32 tcp_rtt_tsopt_us (const struct tcp_sock * tp )
712+ static s32 tcp_rtt_tsopt_us (const struct tcp_sock * tp , u32 min_delta )
716713{
717714 u32 delta , delta_us ;
718715
@@ -722,7 +719,7 @@ static s32 tcp_rtt_tsopt_us(const struct tcp_sock *tp)
722719
723720 if (likely (delta < INT_MAX / (USEC_PER_SEC / TCP_TS_HZ ))) {
724721 if (!delta )
725- delta = 1 ;
722+ delta = min_delta ;
726723 delta_us = delta * (USEC_PER_SEC / TCP_TS_HZ );
727724 return delta_us ;
728725 }
@@ -740,22 +737,47 @@ static inline void tcp_rcv_rtt_measure_ts(struct sock *sk,
740737
741738 if (TCP_SKB_CB (skb )-> end_seq -
742739 TCP_SKB_CB (skb )-> seq >= inet_csk (sk )-> icsk_ack .rcv_mss ) {
743- s32 delta = tcp_rtt_tsopt_us (tp );
740+ s32 delta = tcp_rtt_tsopt_us (tp , 0 );
744741
745- if (delta >= 0 )
742+ if (delta > 0 )
746743 tcp_rcv_rtt_update (tp , delta , 0 );
747744 }
748745}
749746
747+ static void tcp_rcvbuf_grow (struct sock * sk )
748+ {
749+ const struct net * net = sock_net (sk );
750+ struct tcp_sock * tp = tcp_sk (sk );
751+ int rcvwin , rcvbuf , cap ;
752+
753+ if (!READ_ONCE (net -> ipv4 .sysctl_tcp_moderate_rcvbuf ) ||
754+ (sk -> sk_userlocks & SOCK_RCVBUF_LOCK ))
755+ return ;
756+
757+ /* slow start: allow the sender to double its rate. */
758+ rcvwin = tp -> rcvq_space .space << 1 ;
759+
760+ if (!RB_EMPTY_ROOT (& tp -> out_of_order_queue ))
761+ rcvwin += TCP_SKB_CB (tp -> ooo_last_skb )-> end_seq - tp -> rcv_nxt ;
762+
763+ cap = READ_ONCE (net -> ipv4 .sysctl_tcp_rmem [2 ]);
764+
765+ rcvbuf = min_t (u32 , tcp_space_from_win (sk , rcvwin ), cap );
766+ if (rcvbuf > sk -> sk_rcvbuf ) {
767+ WRITE_ONCE (sk -> sk_rcvbuf , rcvbuf );
768+ /* Make the window clamp follow along. */
769+ WRITE_ONCE (tp -> window_clamp ,
770+ tcp_win_from_space (sk , rcvbuf ));
771+ }
772+ }
750773/*
751774 * This function should be called every time data is copied to user space.
752775 * It calculates the appropriate TCP receive buffer space.
753776 */
754777void tcp_rcv_space_adjust (struct sock * sk )
755778{
756779 struct tcp_sock * tp = tcp_sk (sk );
757- u32 copied ;
758- int time ;
780+ int time , inq , copied ;
759781
760782 trace_tcp_rcv_space_adjust (sk );
761783
@@ -766,45 +788,18 @@ void tcp_rcv_space_adjust(struct sock *sk)
766788
767789 /* Number of bytes copied to user in last RTT */
768790 copied = tp -> copied_seq - tp -> rcvq_space .seq ;
791+ /* Number of bytes in receive queue. */
792+ inq = tp -> rcv_nxt - tp -> copied_seq ;
793+ copied -= inq ;
769794 if (copied <= tp -> rcvq_space .space )
770795 goto new_measure ;
771796
772- /* A bit of theory :
773- * copied = bytes received in previous RTT, our base window
774- * To cope with packet losses, we need a 2x factor
775- * To cope with slow start, and sender growing its cwin by 100 %
776- * every RTT, we need a 4x factor, because the ACK we are sending
777- * now is for the next RTT, not the current one :
778- * <prev RTT . ><current RTT .. ><next RTT .... >
779- */
780-
781- if (READ_ONCE (sock_net (sk )-> ipv4 .sysctl_tcp_moderate_rcvbuf ) &&
782- !(sk -> sk_userlocks & SOCK_RCVBUF_LOCK )) {
783- u64 rcvwin , grow ;
784- int rcvbuf ;
785-
786- /* minimal window to cope with packet losses, assuming
787- * steady state. Add some cushion because of small variations.
788- */
789- rcvwin = ((u64 )copied << 1 ) + 16 * tp -> advmss ;
797+ trace_tcp_rcvbuf_grow (sk , time );
790798
791- /* Accommodate for sender rate increase (eg. slow start) */
792- grow = rcvwin * (copied - tp -> rcvq_space .space );
793- do_div (grow , tp -> rcvq_space .space );
794- rcvwin += (grow << 1 );
795-
796- rcvbuf = min_t (u64 , tcp_space_from_win (sk , rcvwin ),
797- READ_ONCE (sock_net (sk )-> ipv4 .sysctl_tcp_rmem [2 ]));
798- if (rcvbuf > sk -> sk_rcvbuf ) {
799- WRITE_ONCE (sk -> sk_rcvbuf , rcvbuf );
800-
801- /* Make the window clamp follow along. */
802- WRITE_ONCE (tp -> window_clamp ,
803- tcp_win_from_space (sk , rcvbuf ));
804- }
805- }
806799 tp -> rcvq_space .space = copied ;
807800
801+ tcp_rcvbuf_grow (sk );
802+
808803new_measure :
809804 tp -> rcvq_space .seq = tp -> copied_seq ;
810805 tp -> rcvq_space .time = tp -> tcp_mstamp ;
@@ -3226,7 +3221,7 @@ static bool tcp_ack_update_rtt(struct sock *sk, const int flag,
32263221 */
32273222 if (seq_rtt_us < 0 && tp -> rx_opt .saw_tstamp &&
32283223 tp -> rx_opt .rcv_tsecr && flag & FLAG_ACKED )
3229- seq_rtt_us = ca_rtt_us = tcp_rtt_tsopt_us (tp );
3224+ seq_rtt_us = ca_rtt_us = tcp_rtt_tsopt_us (tp , 1 );
32303225
32313226 rs -> rtt_us = ca_rtt_us ; /* RTT of last (S)ACKed packet (or -1) */
32323227 if (seq_rtt_us < 0 )
@@ -5173,6 +5168,7 @@ static void tcp_data_queue_ofo(struct sock *sk, struct sk_buff *skb)
51735168 skb_condense (skb );
51745169 skb_set_owner_r (skb , sk );
51755170 }
5171+ tcp_rcvbuf_grow (sk );
51765172}
51775173
51785174static int __must_check tcp_queue_rcv (struct sock * sk , struct sk_buff * skb ,
@@ -6873,6 +6869,9 @@ tcp_rcv_state_process(struct sock *sk, struct sk_buff *skb)
68736869 if (!tp -> srtt_us )
68746870 tcp_synack_rtt_meas (sk , req );
68756871
6872+ if (tp -> rx_opt .tstamp_ok )
6873+ tp -> advmss -= TCPOLEN_TSTAMP_ALIGNED ;
6874+
68766875 if (req ) {
68776876 tcp_rcv_synrecv_state_fastopen (sk );
68786877 } else {
@@ -6898,9 +6897,6 @@ tcp_rcv_state_process(struct sock *sk, struct sk_buff *skb)
68986897 tp -> snd_wnd = ntohs (th -> window ) << tp -> rx_opt .snd_wscale ;
68996898 tcp_init_wl (tp , TCP_SKB_CB (skb )-> seq );
69006899
6901- if (tp -> rx_opt .tstamp_ok )
6902- tp -> advmss -= TCPOLEN_TSTAMP_ALIGNED ;
6903-
69046900 if (!inet_csk (sk )-> icsk_ca_ops -> cong_control )
69056901 tcp_update_pacing_rate (sk );
69066902
0 commit comments