Merge branch 'udp-false-sharing'
Paolo Abeni says: ==================== udp: avoid false sharing on receive Under high UDP load, the BH processing and the user-space receiver can run on different cores. The UDP implementation does a lot of effort to avoid false sharing in the receive path, but recent changes to the struct sock layout moved the sk_forward_alloc and the sk_rcvbuf fields on the same cacheline: /* --- cacheline 4 boundary (256 bytes) --- */ struct sk_buff * tail; } sk_backlog; int sk_forward_alloc; unsigned int sk_reserved_mem; unsigned int sk_ll_usec; unsigned int sk_napi_id; int sk_rcvbuf; sk_forward_alloc is updated by the BH, while sk_rcvbuf is accessed by udp_recvmsg(), causing false sharing. A possible solution would be to re-order the struct sock fields to avoid the false sharing. Such change is subject to being invalidated by future changes and could have negative side effects on other workload. Instead this series uses a different approach, touching only the UDP socket layout. The first patch generalizes the custom setsockopt infrastructure, to allow UDP tracking the buffer size, and the second patch addresses the issue, copying the relevant buffer information into an already hot cacheline. Overall the above gives a 10% peek throughput increase under UDP flood. v1 -> v2: - introduce and use a common helper to initialize the UDP v4/v6 sockets (Kuniyuki) ==================== Signed-off-by: David S. Miller <davem@davemloft.net>
This commit is contained in:
commit
b29e0dece4
@ -41,6 +41,7 @@ struct net;
|
||||
#define SOCK_NOSPACE 2
|
||||
#define SOCK_PASSCRED 3
|
||||
#define SOCK_PASSSEC 4
|
||||
#define SOCK_CUSTOM_SOCKOPT 5
|
||||
|
||||
#ifndef ARCH_HAS_SOCKET_TYPES
|
||||
/**
|
||||
|
@ -87,6 +87,9 @@ struct udp_sock {
|
||||
|
||||
/* This field is dirtied by udp_recvmsg() */
|
||||
int forward_deficit;
|
||||
|
||||
/* This fields follows rcvbuf value, and is touched by udp_recvmsg */
|
||||
int forward_threshold;
|
||||
};
|
||||
|
||||
#define UDP_MAX_SEGMENTS (1 << 6UL)
|
||||
|
@ -174,6 +174,15 @@ INDIRECT_CALLABLE_DECLARE(int udpv6_rcv(struct sk_buff *));
|
||||
struct sk_buff *__udp_gso_segment(struct sk_buff *gso_skb,
|
||||
netdev_features_t features, bool is_ipv6);
|
||||
|
||||
static inline void udp_lib_init_sock(struct sock *sk)
|
||||
{
|
||||
struct udp_sock *up = udp_sk(sk);
|
||||
|
||||
skb_queue_head_init(&up->reader_queue);
|
||||
up->forward_threshold = sk->sk_rcvbuf >> 2;
|
||||
set_bit(SOCK_CUSTOM_SOCKOPT, &sk->sk_socket->flags);
|
||||
}
|
||||
|
||||
/* hash routines shared between UDPv4/6 and UDP-Litev4/6 */
|
||||
static inline int udp_lib_hash(struct sock *sk)
|
||||
{
|
||||
|
@ -1448,7 +1448,7 @@ static void udp_rmem_release(struct sock *sk, int size, int partial,
|
||||
if (likely(partial)) {
|
||||
up->forward_deficit += size;
|
||||
size = up->forward_deficit;
|
||||
if (size < (sk->sk_rcvbuf >> 2) &&
|
||||
if (size < READ_ONCE(up->forward_threshold) &&
|
||||
!skb_queue_empty(&up->reader_queue))
|
||||
return;
|
||||
} else {
|
||||
@ -1622,7 +1622,7 @@ static void udp_destruct_sock(struct sock *sk)
|
||||
|
||||
int udp_init_sock(struct sock *sk)
|
||||
{
|
||||
skb_queue_head_init(&udp_sk(sk)->reader_queue);
|
||||
udp_lib_init_sock(sk);
|
||||
sk->sk_destruct = udp_destruct_sock;
|
||||
return 0;
|
||||
}
|
||||
@ -2671,6 +2671,18 @@ int udp_lib_setsockopt(struct sock *sk, int level, int optname,
|
||||
int err = 0;
|
||||
int is_udplite = IS_UDPLITE(sk);
|
||||
|
||||
if (level == SOL_SOCKET) {
|
||||
err = sk_setsockopt(sk, level, optname, optval, optlen);
|
||||
|
||||
if (optname == SO_RCVBUF || optname == SO_RCVBUFFORCE) {
|
||||
sockopt_lock_sock(sk);
|
||||
/* paired with READ_ONCE in udp_rmem_release() */
|
||||
WRITE_ONCE(up->forward_threshold, sk->sk_rcvbuf >> 2);
|
||||
sockopt_release_sock(sk);
|
||||
}
|
||||
return err;
|
||||
}
|
||||
|
||||
if (optlen < sizeof(int))
|
||||
return -EINVAL;
|
||||
|
||||
@ -2784,7 +2796,7 @@ EXPORT_SYMBOL(udp_lib_setsockopt);
|
||||
int udp_setsockopt(struct sock *sk, int level, int optname, sockptr_t optval,
|
||||
unsigned int optlen)
|
||||
{
|
||||
if (level == SOL_UDP || level == SOL_UDPLITE)
|
||||
if (level == SOL_UDP || level == SOL_UDPLITE || level == SOL_SOCKET)
|
||||
return udp_lib_setsockopt(sk, level, optname,
|
||||
optval, optlen,
|
||||
udp_push_pending_frames);
|
||||
|
@ -64,7 +64,7 @@ static void udpv6_destruct_sock(struct sock *sk)
|
||||
|
||||
int udpv6_init_sock(struct sock *sk)
|
||||
{
|
||||
skb_queue_head_init(&udp_sk(sk)->reader_queue);
|
||||
udp_lib_init_sock(sk);
|
||||
sk->sk_destruct = udpv6_destruct_sock;
|
||||
return 0;
|
||||
}
|
||||
@ -1669,7 +1669,7 @@ void udpv6_destroy_sock(struct sock *sk)
|
||||
int udpv6_setsockopt(struct sock *sk, int level, int optname, sockptr_t optval,
|
||||
unsigned int optlen)
|
||||
{
|
||||
if (level == SOL_UDP || level == SOL_UDPLITE)
|
||||
if (level == SOL_UDP || level == SOL_UDPLITE || level == SOL_SOCKET)
|
||||
return udp_lib_setsockopt(sk, level, optname,
|
||||
optval, optlen,
|
||||
udp_v6_push_pending_frames);
|
||||
|
@ -2708,6 +2708,8 @@ static int mptcp_init_sock(struct sock *sk)
|
||||
if (ret)
|
||||
return ret;
|
||||
|
||||
set_bit(SOCK_CUSTOM_SOCKOPT, &sk->sk_socket->flags);
|
||||
|
||||
/* fetch the ca name; do it outside __mptcp_init_sock(), so that clone will
|
||||
* propagate the correct value
|
||||
*/
|
||||
@ -3684,6 +3686,8 @@ static int mptcp_stream_accept(struct socket *sock, struct socket *newsock,
|
||||
struct mptcp_subflow_context *subflow;
|
||||
struct sock *newsk = newsock->sk;
|
||||
|
||||
set_bit(SOCK_CUSTOM_SOCKOPT, &newsock->flags);
|
||||
|
||||
lock_sock(newsk);
|
||||
|
||||
/* PM/worker can now acquire the first subflow socket
|
||||
|
@ -2199,13 +2199,7 @@ SYSCALL_DEFINE4(recv, int, fd, void __user *, ubuf, size_t, size,
|
||||
|
||||
static bool sock_use_custom_sol_socket(const struct socket *sock)
|
||||
{
|
||||
const struct sock *sk = sock->sk;
|
||||
|
||||
/* Use sock->ops->setsockopt() for MPTCP */
|
||||
return IS_ENABLED(CONFIG_MPTCP) &&
|
||||
sk->sk_protocol == IPPROTO_MPTCP &&
|
||||
sk->sk_type == SOCK_STREAM &&
|
||||
(sk->sk_family == AF_INET || sk->sk_family == AF_INET6);
|
||||
return test_bit(SOCK_CUSTOM_SOCKOPT, &sock->flags);
|
||||
}
|
||||
|
||||
/*
|
||||
|
Loading…
x
Reference in New Issue
Block a user