Merge branch 'tcp-dccp-refine-source-port-selection'
Eric Dumazet says: ==================== tcp/dccp: refine source port selection This patch series leverages IP_LOCAL_PORT_RANGE option to no longer favor even source port selection at connect() time. This should lower time taken by connect() for hosts having many active connections to the same destination. ==================== Link: https://lore.kernel.org/r/20231214192939.1962891-1-edumazet@google.com Signed-off-by: Jakub Kicinski <kuba@kernel.org>
This commit is contained in:
commit
358105ab92
@ -356,7 +356,7 @@ static inline void inet_get_local_port_range(const struct net *net, int *low, in
|
||||
*low = range & 0xffff;
|
||||
*high = range >> 16;
|
||||
}
|
||||
void inet_sk_get_local_port_range(const struct sock *sk, int *low, int *high);
|
||||
bool inet_sk_get_local_port_range(const struct sock *sk, int *low, int *high);
|
||||
|
||||
#ifdef CONFIG_SYSCTL
|
||||
static inline bool inet_is_local_reserved_port(struct net *net, unsigned short port)
|
||||
|
@ -117,16 +117,25 @@ bool inet_rcv_saddr_any(const struct sock *sk)
|
||||
return !sk->sk_rcv_saddr;
|
||||
}
|
||||
|
||||
void inet_sk_get_local_port_range(const struct sock *sk, int *low, int *high)
|
||||
/**
|
||||
* inet_sk_get_local_port_range - fetch ephemeral ports range
|
||||
* @sk: socket
|
||||
* @low: pointer to low port
|
||||
* @high: pointer to high port
|
||||
*
|
||||
* Fetch netns port range (/proc/sys/net/ipv4/ip_local_port_range)
|
||||
* Range can be overridden if socket got IP_LOCAL_PORT_RANGE option.
|
||||
* Returns true if IP_LOCAL_PORT_RANGE was set on this socket.
|
||||
*/
|
||||
bool inet_sk_get_local_port_range(const struct sock *sk, int *low, int *high)
|
||||
{
|
||||
const struct inet_sock *inet = inet_sk(sk);
|
||||
const struct net *net = sock_net(sk);
|
||||
int lo, hi, sk_lo, sk_hi;
|
||||
bool local_range = false;
|
||||
u32 sk_range;
|
||||
|
||||
inet_get_local_port_range(net, &lo, &hi);
|
||||
inet_get_local_port_range(sock_net(sk), &lo, &hi);
|
||||
|
||||
sk_range = READ_ONCE(inet->local_port_range);
|
||||
sk_range = READ_ONCE(inet_sk(sk)->local_port_range);
|
||||
if (unlikely(sk_range)) {
|
||||
sk_lo = sk_range & 0xffff;
|
||||
sk_hi = sk_range >> 16;
|
||||
@ -135,10 +144,12 @@ void inet_sk_get_local_port_range(const struct sock *sk, int *low, int *high)
|
||||
lo = sk_lo;
|
||||
if (lo <= sk_hi && sk_hi <= hi)
|
||||
hi = sk_hi;
|
||||
local_range = true;
|
||||
}
|
||||
|
||||
*low = lo;
|
||||
*high = hi;
|
||||
return local_range;
|
||||
}
|
||||
EXPORT_SYMBOL(inet_sk_get_local_port_range);
|
||||
|
||||
|
@ -1012,7 +1012,8 @@ int __inet_hash_connect(struct inet_timewait_death_row *death_row,
|
||||
bool tb_created = false;
|
||||
u32 remaining, offset;
|
||||
int ret, i, low, high;
|
||||
int l3mdev;
|
||||
bool local_ports;
|
||||
int step, l3mdev;
|
||||
u32 index;
|
||||
|
||||
if (port) {
|
||||
@ -1024,10 +1025,12 @@ int __inet_hash_connect(struct inet_timewait_death_row *death_row,
|
||||
|
||||
l3mdev = inet_sk_bound_l3mdev(sk);
|
||||
|
||||
inet_sk_get_local_port_range(sk, &low, &high);
|
||||
local_ports = inet_sk_get_local_port_range(sk, &low, &high);
|
||||
step = local_ports ? 1 : 2;
|
||||
|
||||
high++; /* [32768, 60999] -> [32768, 61000[ */
|
||||
remaining = high - low;
|
||||
if (likely(remaining > 1))
|
||||
if (!local_ports && remaining > 1)
|
||||
remaining &= ~1U;
|
||||
|
||||
get_random_sleepable_once(table_perturb,
|
||||
@ -1040,10 +1043,11 @@ int __inet_hash_connect(struct inet_timewait_death_row *death_row,
|
||||
/* In first pass we try ports of @low parity.
|
||||
* inet_csk_get_port() does the opposite choice.
|
||||
*/
|
||||
offset &= ~1U;
|
||||
if (!local_ports)
|
||||
offset &= ~1U;
|
||||
other_parity_scan:
|
||||
port = low + offset;
|
||||
for (i = 0; i < remaining; i += 2, port += 2) {
|
||||
for (i = 0; i < remaining; i += step, port += step) {
|
||||
if (unlikely(port >= high))
|
||||
port -= remaining;
|
||||
if (inet_is_local_reserved_port(net, port))
|
||||
@ -1083,10 +1087,11 @@ next_port:
|
||||
cond_resched();
|
||||
}
|
||||
|
||||
offset++;
|
||||
if ((offset & 1) && remaining > 1)
|
||||
goto other_parity_scan;
|
||||
|
||||
if (!local_ports) {
|
||||
offset++;
|
||||
if ((offset & 1) && remaining > 1)
|
||||
goto other_parity_scan;
|
||||
}
|
||||
return -EADDRNOTAVAIL;
|
||||
|
||||
ok:
|
||||
@ -1109,8 +1114,8 @@ ok:
|
||||
* on low contention the randomness is maximal and on high contention
|
||||
* it may be inexistent.
|
||||
*/
|
||||
i = max_t(int, i, get_random_u32_below(8) * 2);
|
||||
WRITE_ONCE(table_perturb[index], READ_ONCE(table_perturb[index]) + i + 2);
|
||||
i = max_t(int, i, get_random_u32_below(8) * step);
|
||||
WRITE_ONCE(table_perturb[index], READ_ONCE(table_perturb[index]) + i + step);
|
||||
|
||||
/* Head lock still held and bh's disabled */
|
||||
inet_bind_hash(sk, tb, tb2, port);
|
||||
|
Loading…
x
Reference in New Issue
Block a user