Merge branch 'tcp-more-perns-sysctls'
Eric Dumazet says: ==================== tcp: move 12 sysctls to namespaces Ideally all TCP sysctls should be per netns. This patch series takes care of 12 sysctls. Remains the ones that need discussion : sysctl_tcp_mem, sysctl_tcp_rmem, sysctl_tcp_wmem, and sysctl_tcp_max_orphans ==================== Signed-off-by: David S. Miller <davem@davemloft.net>
This commit is contained in:
commit
871da0a761
@ -142,6 +142,18 @@ struct netns_ipv4 {
|
||||
int sysctl_tcp_app_win;
|
||||
int sysctl_tcp_adv_win_scale;
|
||||
int sysctl_tcp_frto;
|
||||
int sysctl_tcp_nometrics_save;
|
||||
int sysctl_tcp_moderate_rcvbuf;
|
||||
int sysctl_tcp_tso_win_divisor;
|
||||
int sysctl_tcp_workaround_signed_windows;
|
||||
int sysctl_tcp_limit_output_bytes;
|
||||
int sysctl_tcp_challenge_ack_limit;
|
||||
int sysctl_tcp_min_tso_segs;
|
||||
int sysctl_tcp_min_rtt_wlen;
|
||||
int sysctl_tcp_autocorking;
|
||||
int sysctl_tcp_invalid_ratelimit;
|
||||
int sysctl_tcp_pacing_ss_ratio;
|
||||
int sysctl_tcp_pacing_ca_ratio;
|
||||
struct inet_timewait_death_row tcp_death_row;
|
||||
int sysctl_max_syn_backlog;
|
||||
int sysctl_tcp_fastopen;
|
||||
|
@ -247,22 +247,9 @@ extern int sysctl_tcp_max_orphans;
|
||||
extern long sysctl_tcp_mem[3];
|
||||
extern int sysctl_tcp_wmem[3];
|
||||
extern int sysctl_tcp_rmem[3];
|
||||
extern int sysctl_tcp_nometrics_save;
|
||||
extern int sysctl_tcp_moderate_rcvbuf;
|
||||
extern int sysctl_tcp_tso_win_divisor;
|
||||
extern int sysctl_tcp_workaround_signed_windows;
|
||||
|
||||
#define TCP_RACK_LOSS_DETECTION 0x1 /* Use RACK to detect losses */
|
||||
|
||||
extern int sysctl_tcp_limit_output_bytes;
|
||||
extern int sysctl_tcp_challenge_ack_limit;
|
||||
extern int sysctl_tcp_min_tso_segs;
|
||||
extern int sysctl_tcp_min_rtt_wlen;
|
||||
extern int sysctl_tcp_autocorking;
|
||||
extern int sysctl_tcp_invalid_ratelimit;
|
||||
extern int sysctl_tcp_pacing_ss_ratio;
|
||||
extern int sysctl_tcp_pacing_ca_ratio;
|
||||
|
||||
extern atomic_long_t tcp_memory_allocated;
|
||||
extern struct percpu_counter tcp_sockets_allocated;
|
||||
extern unsigned long tcp_memory_pressure;
|
||||
@ -1305,7 +1292,8 @@ static inline void tcp_slow_start_after_idle_check(struct sock *sk)
|
||||
}
|
||||
|
||||
/* Determine a window scaling and initial window to offer. */
|
||||
void tcp_select_initial_window(int __space, __u32 mss, __u32 *rcv_wnd,
|
||||
void tcp_select_initial_window(const struct sock *sk, int __space,
|
||||
__u32 mss, __u32 *rcv_wnd,
|
||||
__u32 *window_clamp, int wscale_ok,
|
||||
__u8 *rcv_wscale, __u32 init_rcv_wnd);
|
||||
|
||||
|
@ -385,7 +385,7 @@ struct sock *cookie_v4_check(struct sock *sk, struct sk_buff *skb)
|
||||
/* Try to redo what tcp_v4_send_synack did. */
|
||||
req->rsk_window_clamp = tp->window_clamp ? :dst_metric(&rt->dst, RTAX_WINDOW);
|
||||
|
||||
tcp_select_initial_window(tcp_full_space(sk), req->mss,
|
||||
tcp_select_initial_window(sk, tcp_full_space(sk), req->mss,
|
||||
&req->rsk_rcv_wnd, &req->rsk_window_clamp,
|
||||
ireq->wscale_ok, &rcv_wscale,
|
||||
dst_metric(&rt->dst, RTAX_INITRWND));
|
||||
|
@ -437,13 +437,6 @@ static struct ctl_table ipv4_table[] = {
|
||||
.proc_handler = proc_dointvec_minmax,
|
||||
.extra1 = &one,
|
||||
},
|
||||
{
|
||||
.procname = "tcp_min_rtt_wlen",
|
||||
.data = &sysctl_tcp_min_rtt_wlen,
|
||||
.maxlen = sizeof(int),
|
||||
.mode = 0644,
|
||||
.proc_handler = proc_dointvec
|
||||
},
|
||||
{
|
||||
.procname = "tcp_low_latency",
|
||||
.data = &sysctl_tcp_low_latency,
|
||||
@ -451,54 +444,12 @@ static struct ctl_table ipv4_table[] = {
|
||||
.mode = 0644,
|
||||
.proc_handler = proc_dointvec
|
||||
},
|
||||
{
|
||||
.procname = "tcp_no_metrics_save",
|
||||
.data = &sysctl_tcp_nometrics_save,
|
||||
.maxlen = sizeof(int),
|
||||
.mode = 0644,
|
||||
.proc_handler = proc_dointvec,
|
||||
},
|
||||
{
|
||||
.procname = "tcp_moderate_rcvbuf",
|
||||
.data = &sysctl_tcp_moderate_rcvbuf,
|
||||
.maxlen = sizeof(int),
|
||||
.mode = 0644,
|
||||
.proc_handler = proc_dointvec,
|
||||
},
|
||||
{
|
||||
.procname = "tcp_tso_win_divisor",
|
||||
.data = &sysctl_tcp_tso_win_divisor,
|
||||
.maxlen = sizeof(int),
|
||||
.mode = 0644,
|
||||
.proc_handler = proc_dointvec,
|
||||
},
|
||||
{
|
||||
.procname = "tcp_congestion_control",
|
||||
.mode = 0644,
|
||||
.maxlen = TCP_CA_NAME_MAX,
|
||||
.proc_handler = proc_tcp_congestion_control,
|
||||
},
|
||||
{
|
||||
.procname = "tcp_workaround_signed_windows",
|
||||
.data = &sysctl_tcp_workaround_signed_windows,
|
||||
.maxlen = sizeof(int),
|
||||
.mode = 0644,
|
||||
.proc_handler = proc_dointvec
|
||||
},
|
||||
{
|
||||
.procname = "tcp_limit_output_bytes",
|
||||
.data = &sysctl_tcp_limit_output_bytes,
|
||||
.maxlen = sizeof(int),
|
||||
.mode = 0644,
|
||||
.proc_handler = proc_dointvec
|
||||
},
|
||||
{
|
||||
.procname = "tcp_challenge_ack_limit",
|
||||
.data = &sysctl_tcp_challenge_ack_limit,
|
||||
.maxlen = sizeof(int),
|
||||
.mode = 0644,
|
||||
.proc_handler = proc_dointvec
|
||||
},
|
||||
#ifdef CONFIG_NETLABEL
|
||||
{
|
||||
.procname = "cipso_cache_enable",
|
||||
@ -541,49 +492,6 @@ static struct ctl_table ipv4_table[] = {
|
||||
.mode = 0644,
|
||||
.proc_handler = proc_allowed_congestion_control,
|
||||
},
|
||||
{
|
||||
.procname = "tcp_min_tso_segs",
|
||||
.data = &sysctl_tcp_min_tso_segs,
|
||||
.maxlen = sizeof(int),
|
||||
.mode = 0644,
|
||||
.proc_handler = proc_dointvec_minmax,
|
||||
.extra1 = &one,
|
||||
.extra2 = &gso_max_segs,
|
||||
},
|
||||
{
|
||||
.procname = "tcp_pacing_ss_ratio",
|
||||
.data = &sysctl_tcp_pacing_ss_ratio,
|
||||
.maxlen = sizeof(int),
|
||||
.mode = 0644,
|
||||
.proc_handler = proc_dointvec_minmax,
|
||||
.extra1 = &zero,
|
||||
.extra2 = &thousand,
|
||||
},
|
||||
{
|
||||
.procname = "tcp_pacing_ca_ratio",
|
||||
.data = &sysctl_tcp_pacing_ca_ratio,
|
||||
.maxlen = sizeof(int),
|
||||
.mode = 0644,
|
||||
.proc_handler = proc_dointvec_minmax,
|
||||
.extra1 = &zero,
|
||||
.extra2 = &thousand,
|
||||
},
|
||||
{
|
||||
.procname = "tcp_autocorking",
|
||||
.data = &sysctl_tcp_autocorking,
|
||||
.maxlen = sizeof(int),
|
||||
.mode = 0644,
|
||||
.proc_handler = proc_dointvec_minmax,
|
||||
.extra1 = &zero,
|
||||
.extra2 = &one,
|
||||
},
|
||||
{
|
||||
.procname = "tcp_invalid_ratelimit",
|
||||
.data = &sysctl_tcp_invalid_ratelimit,
|
||||
.maxlen = sizeof(int),
|
||||
.mode = 0644,
|
||||
.proc_handler = proc_dointvec_ms_jiffies,
|
||||
},
|
||||
{
|
||||
.procname = "tcp_available_ulp",
|
||||
.maxlen = TCP_ULP_BUF_MAX,
|
||||
@ -1145,6 +1053,98 @@ static struct ctl_table ipv4_net_table[] = {
|
||||
.mode = 0644,
|
||||
.proc_handler = proc_dointvec
|
||||
},
|
||||
{
|
||||
.procname = "tcp_no_metrics_save",
|
||||
.data = &init_net.ipv4.sysctl_tcp_nometrics_save,
|
||||
.maxlen = sizeof(int),
|
||||
.mode = 0644,
|
||||
.proc_handler = proc_dointvec,
|
||||
},
|
||||
{
|
||||
.procname = "tcp_moderate_rcvbuf",
|
||||
.data = &init_net.ipv4.sysctl_tcp_moderate_rcvbuf,
|
||||
.maxlen = sizeof(int),
|
||||
.mode = 0644,
|
||||
.proc_handler = proc_dointvec,
|
||||
},
|
||||
{
|
||||
.procname = "tcp_tso_win_divisor",
|
||||
.data = &init_net.ipv4.sysctl_tcp_tso_win_divisor,
|
||||
.maxlen = sizeof(int),
|
||||
.mode = 0644,
|
||||
.proc_handler = proc_dointvec,
|
||||
},
|
||||
{
|
||||
.procname = "tcp_workaround_signed_windows",
|
||||
.data = &init_net.ipv4.sysctl_tcp_workaround_signed_windows,
|
||||
.maxlen = sizeof(int),
|
||||
.mode = 0644,
|
||||
.proc_handler = proc_dointvec
|
||||
},
|
||||
{
|
||||
.procname = "tcp_limit_output_bytes",
|
||||
.data = &init_net.ipv4.sysctl_tcp_limit_output_bytes,
|
||||
.maxlen = sizeof(int),
|
||||
.mode = 0644,
|
||||
.proc_handler = proc_dointvec
|
||||
},
|
||||
{
|
||||
.procname = "tcp_challenge_ack_limit",
|
||||
.data = &init_net.ipv4.sysctl_tcp_challenge_ack_limit,
|
||||
.maxlen = sizeof(int),
|
||||
.mode = 0644,
|
||||
.proc_handler = proc_dointvec
|
||||
},
|
||||
{
|
||||
.procname = "tcp_min_tso_segs",
|
||||
.data = &init_net.ipv4.sysctl_tcp_min_tso_segs,
|
||||
.maxlen = sizeof(int),
|
||||
.mode = 0644,
|
||||
.proc_handler = proc_dointvec_minmax,
|
||||
.extra1 = &one,
|
||||
.extra2 = &gso_max_segs,
|
||||
},
|
||||
{
|
||||
.procname = "tcp_min_rtt_wlen",
|
||||
.data = &init_net.ipv4.sysctl_tcp_min_rtt_wlen,
|
||||
.maxlen = sizeof(int),
|
||||
.mode = 0644,
|
||||
.proc_handler = proc_dointvec
|
||||
},
|
||||
{
|
||||
.procname = "tcp_autocorking",
|
||||
.data = &init_net.ipv4.sysctl_tcp_autocorking,
|
||||
.maxlen = sizeof(int),
|
||||
.mode = 0644,
|
||||
.proc_handler = proc_dointvec_minmax,
|
||||
.extra1 = &zero,
|
||||
.extra2 = &one,
|
||||
},
|
||||
{
|
||||
.procname = "tcp_invalid_ratelimit",
|
||||
.data = &init_net.ipv4.sysctl_tcp_invalid_ratelimit,
|
||||
.maxlen = sizeof(int),
|
||||
.mode = 0644,
|
||||
.proc_handler = proc_dointvec_ms_jiffies,
|
||||
},
|
||||
{
|
||||
.procname = "tcp_pacing_ss_ratio",
|
||||
.data = &init_net.ipv4.sysctl_tcp_pacing_ss_ratio,
|
||||
.maxlen = sizeof(int),
|
||||
.mode = 0644,
|
||||
.proc_handler = proc_dointvec_minmax,
|
||||
.extra1 = &zero,
|
||||
.extra2 = &thousand,
|
||||
},
|
||||
{
|
||||
.procname = "tcp_pacing_ca_ratio",
|
||||
.data = &init_net.ipv4.sysctl_tcp_pacing_ca_ratio,
|
||||
.maxlen = sizeof(int),
|
||||
.mode = 0644,
|
||||
.proc_handler = proc_dointvec_minmax,
|
||||
.extra1 = &zero,
|
||||
.extra2 = &thousand,
|
||||
},
|
||||
{ }
|
||||
};
|
||||
|
||||
|
@ -285,10 +285,6 @@
|
||||
|
||||
#include <trace/events/tcp.h>
|
||||
|
||||
int sysctl_tcp_min_tso_segs __read_mostly = 2;
|
||||
|
||||
int sysctl_tcp_autocorking __read_mostly = 1;
|
||||
|
||||
struct percpu_counter tcp_orphan_count;
|
||||
EXPORT_SYMBOL_GPL(tcp_orphan_count);
|
||||
|
||||
@ -699,7 +695,7 @@ static bool tcp_should_autocork(struct sock *sk, struct sk_buff *skb,
|
||||
int size_goal)
|
||||
{
|
||||
return skb->len < size_goal &&
|
||||
sysctl_tcp_autocorking &&
|
||||
sock_net(sk)->ipv4.sysctl_tcp_autocorking &&
|
||||
skb != tcp_write_queue_head(sk) &&
|
||||
refcount_read(&sk->sk_wmem_alloc) > skb->truesize;
|
||||
}
|
||||
|
@ -79,13 +79,7 @@
|
||||
#include <linux/unaligned/access_ok.h>
|
||||
#include <linux/static_key.h>
|
||||
|
||||
/* rfc5961 challenge ack rate limiting */
|
||||
int sysctl_tcp_challenge_ack_limit = 1000;
|
||||
|
||||
int sysctl_tcp_max_orphans __read_mostly = NR_FILE;
|
||||
int sysctl_tcp_min_rtt_wlen __read_mostly = 300;
|
||||
int sysctl_tcp_moderate_rcvbuf __read_mostly = 1;
|
||||
int sysctl_tcp_invalid_ratelimit __read_mostly = HZ/2;
|
||||
|
||||
#define FLAG_DATA 0x01 /* Incoming frame contained data. */
|
||||
#define FLAG_WIN_UPDATE 0x02 /* Incoming ACK was a window update. */
|
||||
@ -411,7 +405,7 @@ static void tcp_fixup_rcvbuf(struct sock *sk)
|
||||
/* Dynamic Right Sizing (DRS) has 2 to 3 RTT latency
|
||||
* Allow enough cushion so that sender is not limited by our window
|
||||
*/
|
||||
if (sysctl_tcp_moderate_rcvbuf)
|
||||
if (sock_net(sk)->ipv4.sysctl_tcp_moderate_rcvbuf)
|
||||
rcvmem <<= 2;
|
||||
|
||||
if (sk->sk_rcvbuf < rcvmem)
|
||||
@ -602,7 +596,7 @@ void tcp_rcv_space_adjust(struct sock *sk)
|
||||
* <prev RTT . ><current RTT .. ><next RTT .... >
|
||||
*/
|
||||
|
||||
if (sysctl_tcp_moderate_rcvbuf &&
|
||||
if (sock_net(sk)->ipv4.sysctl_tcp_moderate_rcvbuf &&
|
||||
!(sk->sk_userlocks & SOCK_RCVBUF_LOCK)) {
|
||||
int rcvwin, rcvmem, rcvbuf;
|
||||
|
||||
@ -773,15 +767,6 @@ static void tcp_rtt_estimator(struct sock *sk, long mrtt_us)
|
||||
tp->srtt_us = max(1U, srtt);
|
||||
}
|
||||
|
||||
/* Set the sk_pacing_rate to allow proper sizing of TSO packets.
|
||||
* Note: TCP stack does not yet implement pacing.
|
||||
* FQ packet scheduler can be used to implement cheap but effective
|
||||
* TCP pacing, to smooth the burst on large writes when packets
|
||||
* in flight is significantly lower than cwnd (or rwin)
|
||||
*/
|
||||
int sysctl_tcp_pacing_ss_ratio __read_mostly = 200;
|
||||
int sysctl_tcp_pacing_ca_ratio __read_mostly = 120;
|
||||
|
||||
static void tcp_update_pacing_rate(struct sock *sk)
|
||||
{
|
||||
const struct tcp_sock *tp = tcp_sk(sk);
|
||||
@ -799,9 +784,9 @@ static void tcp_update_pacing_rate(struct sock *sk)
|
||||
* end of slow start and should slow down.
|
||||
*/
|
||||
if (tp->snd_cwnd < tp->snd_ssthresh / 2)
|
||||
rate *= sysctl_tcp_pacing_ss_ratio;
|
||||
rate *= sock_net(sk)->ipv4.sysctl_tcp_pacing_ss_ratio;
|
||||
else
|
||||
rate *= sysctl_tcp_pacing_ca_ratio;
|
||||
rate *= sock_net(sk)->ipv4.sysctl_tcp_pacing_ca_ratio;
|
||||
|
||||
rate *= max(tp->snd_cwnd, tp->packets_out);
|
||||
|
||||
@ -2919,8 +2904,8 @@ static void tcp_fastretrans_alert(struct sock *sk, const int acked,
|
||||
|
||||
static void tcp_update_rtt_min(struct sock *sk, u32 rtt_us)
|
||||
{
|
||||
u32 wlen = sock_net(sk)->ipv4.sysctl_tcp_min_rtt_wlen * HZ;
|
||||
struct tcp_sock *tp = tcp_sk(sk);
|
||||
u32 wlen = sysctl_tcp_min_rtt_wlen * HZ;
|
||||
|
||||
minmax_running_min(&tp->rtt_min, wlen, tcp_jiffies32,
|
||||
rtt_us ? : jiffies_to_usecs(1));
|
||||
@ -3408,7 +3393,7 @@ static bool __tcp_oow_rate_limited(struct net *net, int mib_idx,
|
||||
if (*last_oow_ack_time) {
|
||||
s32 elapsed = (s32)(tcp_jiffies32 - *last_oow_ack_time);
|
||||
|
||||
if (0 <= elapsed && elapsed < sysctl_tcp_invalid_ratelimit) {
|
||||
if (0 <= elapsed && elapsed < net->ipv4.sysctl_tcp_invalid_ratelimit) {
|
||||
NET_INC_STATS(net, mib_idx);
|
||||
return true; /* rate-limited: don't send yet! */
|
||||
}
|
||||
@ -3444,10 +3429,11 @@ static void tcp_send_challenge_ack(struct sock *sk, const struct sk_buff *skb)
|
||||
static u32 challenge_timestamp;
|
||||
static unsigned int challenge_count;
|
||||
struct tcp_sock *tp = tcp_sk(sk);
|
||||
struct net *net = sock_net(sk);
|
||||
u32 count, now;
|
||||
|
||||
/* First check our per-socket dupack rate limit. */
|
||||
if (__tcp_oow_rate_limited(sock_net(sk),
|
||||
if (__tcp_oow_rate_limited(net,
|
||||
LINUX_MIB_TCPACKSKIPPEDCHALLENGE,
|
||||
&tp->last_oow_ack_time))
|
||||
return;
|
||||
@ -3455,16 +3441,16 @@ static void tcp_send_challenge_ack(struct sock *sk, const struct sk_buff *skb)
|
||||
/* Then check host-wide RFC 5961 rate limit. */
|
||||
now = jiffies / HZ;
|
||||
if (now != challenge_timestamp) {
|
||||
u32 half = (sysctl_tcp_challenge_ack_limit + 1) >> 1;
|
||||
u32 ack_limit = net->ipv4.sysctl_tcp_challenge_ack_limit;
|
||||
u32 half = (ack_limit + 1) >> 1;
|
||||
|
||||
challenge_timestamp = now;
|
||||
WRITE_ONCE(challenge_count, half +
|
||||
prandom_u32_max(sysctl_tcp_challenge_ack_limit));
|
||||
WRITE_ONCE(challenge_count, half + prandom_u32_max(ack_limit));
|
||||
}
|
||||
count = READ_ONCE(challenge_count);
|
||||
if (count > 0) {
|
||||
WRITE_ONCE(challenge_count, count - 1);
|
||||
NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPCHALLENGEACK);
|
||||
NET_INC_STATS(net, LINUX_MIB_TCPCHALLENGEACK);
|
||||
tcp_send_ack(sk);
|
||||
}
|
||||
}
|
||||
|
@ -2493,6 +2493,22 @@ static int __net_init tcp_sk_init(struct net *net)
|
||||
net->ipv4.sysctl_tcp_app_win = 31;
|
||||
net->ipv4.sysctl_tcp_adv_win_scale = 1;
|
||||
net->ipv4.sysctl_tcp_frto = 2;
|
||||
net->ipv4.sysctl_tcp_moderate_rcvbuf = 1;
|
||||
/* This limits the percentage of the congestion window which we
|
||||
* will allow a single TSO frame to consume. Building TSO frames
|
||||
* which are too large can cause TCP streams to be bursty.
|
||||
*/
|
||||
net->ipv4.sysctl_tcp_tso_win_divisor = 3;
|
||||
/* Default TSQ limit of four TSO segments */
|
||||
net->ipv4.sysctl_tcp_limit_output_bytes = 262144;
|
||||
/* rfc5961 challenge ack rate limiting */
|
||||
net->ipv4.sysctl_tcp_challenge_ack_limit = 1000;
|
||||
net->ipv4.sysctl_tcp_min_tso_segs = 2;
|
||||
net->ipv4.sysctl_tcp_min_rtt_wlen = 300;
|
||||
net->ipv4.sysctl_tcp_autocorking = 1;
|
||||
net->ipv4.sysctl_tcp_invalid_ratelimit = HZ/2;
|
||||
net->ipv4.sysctl_tcp_pacing_ss_ratio = 200;
|
||||
net->ipv4.sysctl_tcp_pacing_ca_ratio = 120;
|
||||
|
||||
net->ipv4.sysctl_tcp_fastopen = TFO_CLIENT_ENABLE;
|
||||
spin_lock_init(&net->ipv4.tcp_fastopen_ctx_lock);
|
||||
|
@ -20,8 +20,6 @@
|
||||
#include <net/tcp.h>
|
||||
#include <net/genetlink.h>
|
||||
|
||||
int sysctl_tcp_nometrics_save __read_mostly;
|
||||
|
||||
static struct tcp_metrics_block *__tcp_get_metrics(const struct inetpeer_addr *saddr,
|
||||
const struct inetpeer_addr *daddr,
|
||||
struct net *net, unsigned int hash);
|
||||
@ -330,7 +328,7 @@ void tcp_update_metrics(struct sock *sk)
|
||||
int m;
|
||||
|
||||
sk_dst_confirm(sk);
|
||||
if (sysctl_tcp_nometrics_save || !dst)
|
||||
if (net->ipv4.sysctl_tcp_nometrics_save || !dst)
|
||||
return;
|
||||
|
||||
rcu_read_lock();
|
||||
|
@ -369,7 +369,7 @@ void tcp_openreq_init_rwin(struct request_sock *req,
|
||||
full_space = rcv_wnd * mss;
|
||||
|
||||
/* tcp_full_space because it is guaranteed to be the first packet */
|
||||
tcp_select_initial_window(full_space,
|
||||
tcp_select_initial_window(sk_listener, full_space,
|
||||
mss - (ireq->tstamp_ok ? TCPOLEN_TSTAMP_ALIGNED : 0),
|
||||
&req->rsk_rcv_wnd,
|
||||
&req->rsk_window_clamp,
|
||||
|
@ -45,20 +45,6 @@
|
||||
|
||||
#include <trace/events/tcp.h>
|
||||
|
||||
/* People can turn this on to work with those rare, broken TCPs that
|
||||
* interpret the window field as a signed quantity.
|
||||
*/
|
||||
int sysctl_tcp_workaround_signed_windows __read_mostly = 0;
|
||||
|
||||
/* Default TSQ limit of four TSO segments */
|
||||
int sysctl_tcp_limit_output_bytes __read_mostly = 262144;
|
||||
|
||||
/* This limits the percentage of the congestion window which we
|
||||
* will allow a single TSO frame to consume. Building TSO frames
|
||||
* which are too large can cause TCP streams to be bursty.
|
||||
*/
|
||||
int sysctl_tcp_tso_win_divisor __read_mostly = 3;
|
||||
|
||||
static bool tcp_write_xmit(struct sock *sk, unsigned int mss_now, int nonagle,
|
||||
int push_one, gfp_t gfp);
|
||||
|
||||
@ -202,7 +188,7 @@ u32 tcp_default_init_rwnd(u32 mss)
|
||||
* be a multiple of mss if possible. We assume here that mss >= 1.
|
||||
* This MUST be enforced by all callers.
|
||||
*/
|
||||
void tcp_select_initial_window(int __space, __u32 mss,
|
||||
void tcp_select_initial_window(const struct sock *sk, int __space, __u32 mss,
|
||||
__u32 *rcv_wnd, __u32 *window_clamp,
|
||||
int wscale_ok, __u8 *rcv_wscale,
|
||||
__u32 init_rcv_wnd)
|
||||
@ -226,7 +212,7 @@ void tcp_select_initial_window(int __space, __u32 mss,
|
||||
* which we interpret as a sign the remote TCP is not
|
||||
* misinterpreting the window field as a signed quantity.
|
||||
*/
|
||||
if (sysctl_tcp_workaround_signed_windows)
|
||||
if (sock_net(sk)->ipv4.sysctl_tcp_workaround_signed_windows)
|
||||
(*rcv_wnd) = min(space, MAX_TCP_WINDOW);
|
||||
else
|
||||
(*rcv_wnd) = space;
|
||||
@ -286,7 +272,8 @@ static u16 tcp_select_window(struct sock *sk)
|
||||
/* Make sure we do not exceed the maximum possible
|
||||
* scaled window.
|
||||
*/
|
||||
if (!tp->rx_opt.rcv_wscale && sysctl_tcp_workaround_signed_windows)
|
||||
if (!tp->rx_opt.rcv_wscale &&
|
||||
sock_net(sk)->ipv4.sysctl_tcp_workaround_signed_windows)
|
||||
new_win = min(new_win, MAX_TCP_WINDOW);
|
||||
else
|
||||
new_win = min(new_win, (65535U << tp->rx_opt.rcv_wscale));
|
||||
@ -1771,7 +1758,8 @@ static u32 tcp_tso_segs(struct sock *sk, unsigned int mss_now)
|
||||
u32 tso_segs = ca_ops->tso_segs_goal ? ca_ops->tso_segs_goal(sk) : 0;
|
||||
|
||||
return tso_segs ? :
|
||||
tcp_tso_autosize(sk, mss_now, sysctl_tcp_min_tso_segs);
|
||||
tcp_tso_autosize(sk, mss_now,
|
||||
sock_net(sk)->ipv4.sysctl_tcp_min_tso_segs);
|
||||
}
|
||||
|
||||
/* Returns the portion of skb which can be sent right away */
|
||||
@ -1988,7 +1976,7 @@ static bool tcp_tso_should_defer(struct sock *sk, struct sk_buff *skb,
|
||||
if ((skb != tcp_write_queue_tail(sk)) && (limit >= skb->len))
|
||||
goto send_now;
|
||||
|
||||
win_divisor = ACCESS_ONCE(sysctl_tcp_tso_win_divisor);
|
||||
win_divisor = ACCESS_ONCE(sock_net(sk)->ipv4.sysctl_tcp_tso_win_divisor);
|
||||
if (win_divisor) {
|
||||
u32 chunk = min(tp->snd_wnd, tp->snd_cwnd * tp->mss_cache);
|
||||
|
||||
@ -2225,7 +2213,8 @@ static bool tcp_small_queue_check(struct sock *sk, const struct sk_buff *skb,
|
||||
unsigned int limit;
|
||||
|
||||
limit = max(2 * skb->truesize, sk->sk_pacing_rate >> 10);
|
||||
limit = min_t(u32, limit, sysctl_tcp_limit_output_bytes);
|
||||
limit = min_t(u32, limit,
|
||||
sock_net(sk)->ipv4.sysctl_tcp_limit_output_bytes);
|
||||
limit <<= factor;
|
||||
|
||||
if (refcount_read(&sk->sk_wmem_alloc) > limit) {
|
||||
@ -3355,7 +3344,7 @@ static void tcp_connect_init(struct sock *sk)
|
||||
if (rcv_wnd == 0)
|
||||
rcv_wnd = dst_metric(dst, RTAX_INITRWND);
|
||||
|
||||
tcp_select_initial_window(tcp_full_space(sk),
|
||||
tcp_select_initial_window(sk, tcp_full_space(sk),
|
||||
tp->advmss - (tp->rx_opt.ts_recent_stamp ? tp->tcp_header_len - sizeof(struct tcphdr) : 0),
|
||||
&tp->rcv_wnd,
|
||||
&tp->window_clamp,
|
||||
|
@ -244,7 +244,7 @@ struct sock *cookie_v6_check(struct sock *sk, struct sk_buff *skb)
|
||||
}
|
||||
|
||||
req->rsk_window_clamp = tp->window_clamp ? :dst_metric(dst, RTAX_WINDOW);
|
||||
tcp_select_initial_window(tcp_full_space(sk), req->mss,
|
||||
tcp_select_initial_window(sk, tcp_full_space(sk), req->mss,
|
||||
&req->rsk_rcv_wnd, &req->rsk_window_clamp,
|
||||
ireq->wscale_ok, &rcv_wscale,
|
||||
dst_metric(dst, RTAX_INITRWND));
|
||||
|
Loading…
x
Reference in New Issue
Block a user