2020-05-11 14:54:34 +01:00
// SPDX-License-Identifier: GPL-2.0
/* RTT/RTO calculation.
*
* Adapted from TCP for AF_RXRPC by David Howells ( dhowells @ redhat . com )
*
* https : //tools.ietf.org/html/rfc6298
* https : //tools.ietf.org/html/rfc1122#section-4.2.3.1
* http : //ccr.sigcomm.org/archive/1995/jan95/ccr-9501-partridge87.pdf
*/
# include <linux/net.h>
# include "ar-internal.h"
# define RXRPC_RTO_MAX ((unsigned)(120 * HZ))
# define RXRPC_TIMEOUT_INIT ((unsigned)(1*HZ)) /* RFC6298 2.1 initial RTO value */
# define rxrpc_jiffies32 ((u32)jiffies) /* As rxrpc_jiffies32 */
static u32 rxrpc_rto_min_us ( struct rxrpc_peer * peer )
{
return 200 ;
}
static u32 __rxrpc_set_rto ( const struct rxrpc_peer * peer )
{
return _usecs_to_jiffies ( ( peer - > srtt_us > > 3 ) + peer - > rttvar_us ) ;
}
static u32 rxrpc_bound_rto ( u32 rto )
{
return min ( rto , RXRPC_RTO_MAX ) ;
}
/*
* Called to compute a smoothed rtt estimate . The data fed to this
* routine either comes from timestamps , or from segments that were
* known _not_ to have been retransmitted [ see Karn / Partridge
* Proceedings SIGCOMM 87 ] . The algorithm is from the SIGCOMM 88
* piece by Van Jacobson .
* NOTE : the next three routines used to be one big routine .
* To save cycles in the RFC 1323 implementation it was better to break
* it up into three procedures . - - erics
*/
static void rxrpc_rtt_estimator ( struct rxrpc_peer * peer , long sample_rtt_us )
{
long m = sample_rtt_us ; /* RTT */
u32 srtt = peer - > srtt_us ;
/* The following amusing code comes from Jacobson's
* article in SIGCOMM ' 88. Note that rtt and mdev
* are scaled versions of rtt and mean deviation .
* This is designed to be as fast as possible
* m stands for " measurement " .
*
* On a 1990 paper the rto value is changed to :
* RTO = rtt + 4 * mdev
*
* Funny . This algorithm seems to be very broken .
* These formulae increase RTO , when it should be decreased , increase
* too slowly , when it should be increased quickly , decrease too quickly
* etc . I guess in BSD RTO takes ONE value , so that it is absolutely
* does not matter how to _calculate_ it . Seems , it was trap
* that VJ failed to avoid . 8 )
*/
if ( srtt ! = 0 ) {
m - = ( srtt > > 3 ) ; /* m is now error in rtt est */
srtt + = m ; /* rtt = 7/8 rtt + 1/8 new */
if ( m < 0 ) {
m = - m ; /* m is now abs(error) */
m - = ( peer - > mdev_us > > 2 ) ; /* similar update on mdev */
/* This is similar to one of Eifel findings.
* Eifel blocks mdev updates when rtt decreases .
* This solution is a bit different : we use finer gain
* for mdev in this case ( alpha * beta ) .
* Like Eifel it also prevents growth of rto ,
* but also it limits too fast rto decreases ,
* happening in pure Eifel .
*/
if ( m > 0 )
m > > = 3 ;
} else {
m - = ( peer - > mdev_us > > 2 ) ; /* similar update on mdev */
}
peer - > mdev_us + = m ; /* mdev = 3/4 mdev + 1/4 new */
if ( peer - > mdev_us > peer - > mdev_max_us ) {
peer - > mdev_max_us = peer - > mdev_us ;
if ( peer - > mdev_max_us > peer - > rttvar_us )
peer - > rttvar_us = peer - > mdev_max_us ;
}
} else {
/* no previous measure. */
srtt = m < < 3 ; /* take the measured time to be rtt */
peer - > mdev_us = m < < 1 ; /* make sure rto = 3*rtt */
peer - > rttvar_us = max ( peer - > mdev_us , rxrpc_rto_min_us ( peer ) ) ;
peer - > mdev_max_us = peer - > rttvar_us ;
}
peer - > srtt_us = max ( 1U , srtt ) ;
}
/*
* Calculate rto without backoff . This is the second half of Van Jacobson ' s
* routine referred to above .
*/
static void rxrpc_set_rto ( struct rxrpc_peer * peer )
{
u32 rto ;
/* 1. If rtt variance happened to be less 50msec, it is hallucination.
* It cannot be less due to utterly erratic ACK generation made
* at least by solaris and freebsd . " Erratic ACKs " has _nothing_
* to do with delayed acks , because at cwnd > 2 true delack timeout
* is invisible . Actually , Linux - 2.4 also generates erratic
* ACKs in some circumstances .
*/
rto = __rxrpc_set_rto ( peer ) ;
/* 2. Fixups made earlier cannot be right.
* If we do not estimate RTO correctly without them ,
* all the algo is pure shit and should be replaced
* with correct one . It is exactly , which we pretend to do .
*/
/* NOTE: clamping at RXRPC_RTO_MIN is not required, current algo
* guarantees that rto is higher .
*/
peer - > rto_j = rxrpc_bound_rto ( rto ) ;
}
static void rxrpc_ack_update_rtt ( struct rxrpc_peer * peer , long rtt_us )
{
if ( rtt_us < 0 )
return ;
//rxrpc_update_rtt_min(peer, rtt_us);
rxrpc_rtt_estimator ( peer , rtt_us ) ;
rxrpc_set_rto ( peer ) ;
/* RFC6298: only reset backoff on valid RTT measurement. */
peer - > backoff = 0 ;
}
/*
* Add RTT information to cache . This is called in softirq mode and has
* exclusive access to the peer RTT data .
*/
void rxrpc_peer_add_rtt ( struct rxrpc_call * call , enum rxrpc_rtt_rx_trace why ,
rxrpc: Fix loss of RTT samples due to interposed ACK
The Rx protocol has a mechanism to help generate RTT samples that works by
a client transmitting a REQUESTED-type ACK when it receives a DATA packet
that has the REQUEST_ACK flag set.
The peer, however, may interpose other ACKs before transmitting the
REQUESTED-ACK, as can be seen in the following trace excerpt:
rxrpc_tx_data: c=00000044 DATA d0b5ece8:00000001 00000001 q=00000001 fl=07
rxrpc_rx_ack: c=00000044 00000001 PNG r=00000000 f=00000002 p=00000000 n=0
rxrpc_rx_ack: c=00000044 00000002 REQ r=00000001 f=00000002 p=00000001 n=0
...
DATA packet 1 (q=xx) has REQUEST_ACK set (bit 1 of fl=xx). The incoming
ping (labelled PNG) hard-acks the request DATA packet (f=xx exceeds the
sequence number of the DATA packet), causing it to be discarded from the Tx
ring. The ACK that was requested (labelled REQ, r=xx references the serial
of the DATA packet) comes after the ping, but the sk_buff holding the
timestamp has gone and the RTT sample is lost.
This is particularly noticeable on RPC calls used to probe the service
offered by the peer. A lot of peers end up with an unknown RTT because we
only ever sent a single RPC. This confuses the server rotation algorithm.
Fix this by caching the information about the outgoing packet in RTT
calculations in the rxrpc_call struct rather than looking in the Tx ring.
A four-deep buffer is maintained and both REQUEST_ACK-flagged DATA and
PING-ACK transmissions are recorded in there. When the appropriate
response ACK is received, the buffer is checked for a match and, if found,
an RTT sample is recorded.
If a received ACK refers to a packet with a later serial number than an
entry in the cache, that entry is presumed lost and the entry is made
available to record a new transmission.
ACKs types other than REQUESTED-type and PING-type cause any matching
sample to be cancelled as they don't necessarily represent a useful
measurement.
If there's no space in the buffer on ping/data transmission, the sample
base is discarded.
Fixes: 50235c4b5a2f ("rxrpc: Obtain RTT data by requesting ACKs on DATA packets")
Signed-off-by: David Howells <dhowells@redhat.com>
2020-08-19 23:29:16 +01:00
int rtt_slot ,
2020-05-11 14:54:34 +01:00
rxrpc_serial_t send_serial , rxrpc_serial_t resp_serial ,
ktime_t send_time , ktime_t resp_time )
{
struct rxrpc_peer * peer = call - > peer ;
s64 rtt_us ;
rtt_us = ktime_to_us ( ktime_sub ( resp_time , send_time ) ) ;
if ( rtt_us < 0 )
return ;
spin_lock ( & peer - > rtt_input_lock ) ;
rxrpc_ack_update_rtt ( peer , rtt_us ) ;
if ( peer - > rtt_count < 3 )
peer - > rtt_count + + ;
spin_unlock ( & peer - > rtt_input_lock ) ;
rxrpc: Fix loss of RTT samples due to interposed ACK
The Rx protocol has a mechanism to help generate RTT samples that works by
a client transmitting a REQUESTED-type ACK when it receives a DATA packet
that has the REQUEST_ACK flag set.
The peer, however, may interpose other ACKs before transmitting the
REQUESTED-ACK, as can be seen in the following trace excerpt:
rxrpc_tx_data: c=00000044 DATA d0b5ece8:00000001 00000001 q=00000001 fl=07
rxrpc_rx_ack: c=00000044 00000001 PNG r=00000000 f=00000002 p=00000000 n=0
rxrpc_rx_ack: c=00000044 00000002 REQ r=00000001 f=00000002 p=00000001 n=0
...
DATA packet 1 (q=xx) has REQUEST_ACK set (bit 1 of fl=xx). The incoming
ping (labelled PNG) hard-acks the request DATA packet (f=xx exceeds the
sequence number of the DATA packet), causing it to be discarded from the Tx
ring. The ACK that was requested (labelled REQ, r=xx references the serial
of the DATA packet) comes after the ping, but the sk_buff holding the
timestamp has gone and the RTT sample is lost.
This is particularly noticeable on RPC calls used to probe the service
offered by the peer. A lot of peers end up with an unknown RTT because we
only ever sent a single RPC. This confuses the server rotation algorithm.
Fix this by caching the information about the outgoing packet in RTT
calculations in the rxrpc_call struct rather than looking in the Tx ring.
A four-deep buffer is maintained and both REQUEST_ACK-flagged DATA and
PING-ACK transmissions are recorded in there. When the appropriate
response ACK is received, the buffer is checked for a match and, if found,
an RTT sample is recorded.
If a received ACK refers to a packet with a later serial number than an
entry in the cache, that entry is presumed lost and the entry is made
available to record a new transmission.
ACKs types other than REQUESTED-type and PING-type cause any matching
sample to be cancelled as they don't necessarily represent a useful
measurement.
If there's no space in the buffer on ping/data transmission, the sample
base is discarded.
Fixes: 50235c4b5a2f ("rxrpc: Obtain RTT data by requesting ACKs on DATA packets")
Signed-off-by: David Howells <dhowells@redhat.com>
2020-08-19 23:29:16 +01:00
trace_rxrpc_rtt_rx ( call , why , rtt_slot , send_serial , resp_serial ,
2020-05-11 14:54:34 +01:00
peer - > srtt_us > > 3 , peer - > rto_j ) ;
}
/*
* Get the retransmission timeout to set in jiffies , backing it off each time
* we retransmit .
*/
unsigned long rxrpc_get_rto_backoff ( struct rxrpc_peer * peer , bool retrans )
{
u64 timo_j ;
u8 backoff = READ_ONCE ( peer - > backoff ) ;
timo_j = peer - > rto_j ;
timo_j < < = backoff ;
if ( retrans & & timo_j * 2 < = RXRPC_RTO_MAX )
WRITE_ONCE ( peer - > backoff , backoff + 1 ) ;
if ( timo_j < 1 )
timo_j = 1 ;
return timo_j ;
}
void rxrpc_peer_init_rtt ( struct rxrpc_peer * peer )
{
peer - > rto_j = RXRPC_TIMEOUT_INIT ;
peer - > mdev_us = jiffies_to_usecs ( RXRPC_TIMEOUT_INIT ) ;
peer - > backoff = 0 ;
//minmax_reset(&peer->rtt_min, rxrpc_jiffies32, ~0U);
}