From 8f279fb00bb29def9ac79e28c5d6d8e07d21f3fb Mon Sep 17 00:00:00 2001 From: Pavel Begunkov Date: Thu, 27 Oct 2022 00:25:56 +0100 Subject: [PATCH 1/4] udp: advertise ipv6 udp support for msghdr::ubuf_info Mark udp ipv6 as supporting msghdr::ubuf_info. In the original commit SOCK_SUPPORT_ZC was supposed to be set by a udp_init_sock() call from udp6_init_sock(), but d38afeec26ed4 ("tcp/udp: Call inet6_destroy_sock() in IPv6 ...") removed it and so ipv6 udp misses the flag. Cc: # 6.0 Fixes: e993ffe3da4bc ("net: flag sockets supporting msghdr originated zerocopy") Signed-off-by: Pavel Begunkov Signed-off-by: Jakub Kicinski --- net/ipv6/udp.c | 1 + 1 file changed, 1 insertion(+) diff --git a/net/ipv6/udp.c b/net/ipv6/udp.c index 129ec5a9b0eb..bc65e5b7195b 100644 --- a/net/ipv6/udp.c +++ b/net/ipv6/udp.c @@ -66,6 +66,7 @@ int udpv6_init_sock(struct sock *sk) { skb_queue_head_init(&udp_sk(sk)->reader_queue); sk->sk_destruct = udpv6_destruct_sock; + set_bit(SOCK_SUPPORT_ZC, &sk->sk_socket->flags); return 0; } From fee9ac06647e59a69fb7aec58f25267c134264b4 Mon Sep 17 00:00:00 2001 From: Pavel Begunkov Date: Thu, 27 Oct 2022 00:25:57 +0100 Subject: [PATCH 2/4] net: remove SOCK_SUPPORT_ZC from sockmap sockmap replaces ->sk_prot with its own callbacks, we should remove SOCK_SUPPORT_ZC as the new proto doesn't support msghdr::ubuf_info. Cc: # 6.0 Reported-by: Jakub Kicinski Fixes: e993ffe3da4bc ("net: flag sockets supporting msghdr originated zerocopy") Signed-off-by: Pavel Begunkov Signed-off-by: Jakub Kicinski --- include/net/sock.h | 7 +++++++ net/ipv4/tcp_bpf.c | 4 ++-- net/ipv4/udp_bpf.c | 4 ++-- net/unix/unix_bpf.c | 8 ++++---- 4 files changed, 15 insertions(+), 8 deletions(-) diff --git a/include/net/sock.h b/include/net/sock.h index 22f8bab583dd..5db02546941c 100644 --- a/include/net/sock.h +++ b/include/net/sock.h @@ -1889,6 +1889,13 @@ void sock_kfree_s(struct sock *sk, void *mem, int size); void sock_kzfree_s(struct sock *sk, void *mem, int size); void sk_send_sigurg(struct sock *sk); +static inline void sock_replace_proto(struct sock *sk, struct proto *proto) +{ + if (sk->sk_socket) + clear_bit(SOCK_SUPPORT_ZC, &sk->sk_socket->flags); + WRITE_ONCE(sk->sk_prot, proto); +} + struct sockcm_cookie { u64 transmit_time; u32 mark; diff --git a/net/ipv4/tcp_bpf.c b/net/ipv4/tcp_bpf.c index a1626afe87a1..c501c329b1db 100644 --- a/net/ipv4/tcp_bpf.c +++ b/net/ipv4/tcp_bpf.c @@ -607,7 +607,7 @@ int tcp_bpf_update_proto(struct sock *sk, struct sk_psock *psock, bool restore) } else { sk->sk_write_space = psock->saved_write_space; /* Pairs with lockless read in sk_clone_lock() */ - WRITE_ONCE(sk->sk_prot, psock->sk_proto); + sock_replace_proto(sk, psock->sk_proto); } return 0; } @@ -620,7 +620,7 @@ int tcp_bpf_update_proto(struct sock *sk, struct sk_psock *psock, bool restore) } /* Pairs with lockless read in sk_clone_lock() */ - WRITE_ONCE(sk->sk_prot, &tcp_bpf_prots[family][config]); + sock_replace_proto(sk, &tcp_bpf_prots[family][config]); return 0; } EXPORT_SYMBOL_GPL(tcp_bpf_update_proto); diff --git a/net/ipv4/udp_bpf.c b/net/ipv4/udp_bpf.c index ff15918b7bdc..e5dc91d0e079 100644 --- a/net/ipv4/udp_bpf.c +++ b/net/ipv4/udp_bpf.c @@ -141,14 +141,14 @@ int udp_bpf_update_proto(struct sock *sk, struct sk_psock *psock, bool restore) if (restore) { sk->sk_write_space = psock->saved_write_space; - WRITE_ONCE(sk->sk_prot, psock->sk_proto); + sock_replace_proto(sk, psock->sk_proto); return 0; } if (sk->sk_family == AF_INET6) udp_bpf_check_v6_needs_rebuild(psock->sk_proto); - WRITE_ONCE(sk->sk_prot, &udp_bpf_prots[family]); + sock_replace_proto(sk, &udp_bpf_prots[family]); return 0; } EXPORT_SYMBOL_GPL(udp_bpf_update_proto); diff --git a/net/unix/unix_bpf.c b/net/unix/unix_bpf.c index 7cf14c6b1725..e9bf15513961 100644 --- a/net/unix/unix_bpf.c +++ b/net/unix/unix_bpf.c @@ -145,12 +145,12 @@ int unix_dgram_bpf_update_proto(struct sock *sk, struct sk_psock *psock, bool re if (restore) { sk->sk_write_space = psock->saved_write_space; - WRITE_ONCE(sk->sk_prot, psock->sk_proto); + sock_replace_proto(sk, psock->sk_proto); return 0; } unix_dgram_bpf_check_needs_rebuild(psock->sk_proto); - WRITE_ONCE(sk->sk_prot, &unix_dgram_bpf_prot); + sock_replace_proto(sk, &unix_dgram_bpf_prot); return 0; } @@ -158,12 +158,12 @@ int unix_stream_bpf_update_proto(struct sock *sk, struct sk_psock *psock, bool r { if (restore) { sk->sk_write_space = psock->saved_write_space; - WRITE_ONCE(sk->sk_prot, psock->sk_proto); + sock_replace_proto(sk, psock->sk_proto); return 0; } unix_stream_bpf_check_needs_rebuild(psock->sk_proto); - WRITE_ONCE(sk->sk_prot, &unix_stream_bpf_prot); + sock_replace_proto(sk, &unix_stream_bpf_prot); return 0; } From e276d62dcfdee6582486e8b8344dd869518e14be Mon Sep 17 00:00:00 2001 From: Pavel Begunkov Date: Thu, 27 Oct 2022 00:25:58 +0100 Subject: [PATCH 3/4] net/ulp: remove SOCK_SUPPORT_ZC from tls sockets Remove SOCK_SUPPORT_ZC when we're setting ulp as it might not support msghdr::ubuf_info, e.g. like TLS replacing ->sk_prot with a new set of handlers. Cc: # 6.0 Reported-by: Jakub Kicinski Fixes: e993ffe3da4bc ("net: flag sockets supporting msghdr originated zerocopy") Signed-off-by: Pavel Begunkov Signed-off-by: Jakub Kicinski --- net/ipv4/tcp_ulp.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/net/ipv4/tcp_ulp.c b/net/ipv4/tcp_ulp.c index 7c27aa629af1..9ae50b1bd844 100644 --- a/net/ipv4/tcp_ulp.c +++ b/net/ipv4/tcp_ulp.c @@ -136,6 +136,9 @@ static int __tcp_set_ulp(struct sock *sk, const struct tcp_ulp_ops *ulp_ops) if (icsk->icsk_ulp_ops) goto out_err; + if (sk->sk_socket) + clear_bit(SOCK_SUPPORT_ZC, &sk->sk_socket->flags); + err = ulp_ops->init(sk); if (err) goto out_err; From 71b7786ea478f3c4611deff4d2b9676b0c17c56b Mon Sep 17 00:00:00 2001 From: Stefan Metzmacher Date: Thu, 27 Oct 2022 00:25:59 +0100 Subject: [PATCH 4/4] net: also flag accepted sockets supporting msghdr originated zerocopy Without this only the client initiated tcp sockets have SOCK_SUPPORT_ZC. The listening socket on the server also has it, but the accepted connections didn't, which meant IORING_OP_SEND[MSG]_ZC will always fails with -EOPNOTSUPP. Fixes: e993ffe3da4b ("net: flag sockets supporting msghdr originated zerocopy") Cc: # 6.0 CC: Jens Axboe Link: https://lore.kernel.org/io-uring/20221024141503.22b4e251@kernel.org/T/#m38aa19b0b825758fb97860a38ad13122051f9dda Signed-off-by: Stefan Metzmacher Signed-off-by: Pavel Begunkov Signed-off-by: Jakub Kicinski --- net/ipv4/af_inet.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/net/ipv4/af_inet.c b/net/ipv4/af_inet.c index 3dd02396517d..4728087c42a5 100644 --- a/net/ipv4/af_inet.c +++ b/net/ipv4/af_inet.c @@ -754,6 +754,8 @@ int inet_accept(struct socket *sock, struct socket *newsock, int flags, (TCPF_ESTABLISHED | TCPF_SYN_RECV | TCPF_CLOSE_WAIT | TCPF_CLOSE))); + if (test_bit(SOCK_SUPPORT_ZC, &sock->flags)) + set_bit(SOCK_SUPPORT_ZC, &newsock->flags); sock_graft(sk2, newsock); newsock->state = SS_CONNECTED;