ipv4, ipv6: Use splice_eof() to flush
Allow splice to undo the effects of MSG_MORE after prematurely ending a splice/sendfile due to getting an EOF condition (->splice_read() returned 0) after splice had called sendmsg() with MSG_MORE set when the user didn't set MSG_MORE. For UDP, a pending packet will not be emitted if the socket is closed before it is flushed; with this change, it be flushed by ->splice_eof(). For TCP, it's not clear that MSG_MORE is actually effective. Suggested-by: Linus Torvalds <torvalds@linux-foundation.org> Link: https://lore.kernel.org/r/CAHk-=wh=V579PDYvkpnTobCLGczbgxpMgGmmhqiTyE34Cpi5Gg@mail.gmail.com/ Signed-off-by: David Howells <dhowells@redhat.com> cc: Kuniyuki Iwashima <kuniyu@amazon.com> cc: Willem de Bruijn <willemdebruijn.kernel@gmail.com> cc: David Ahern <dsahern@kernel.org> cc: Jens Axboe <axboe@kernel.dk> cc: Matthew Wilcox <willy@infradead.org> Signed-off-by: Jakub Kicinski <kuba@kernel.org>
This commit is contained in:
parent
d4c1e80b0d
commit
1d7e4538a5
@ -35,6 +35,7 @@ void __inet_accept(struct socket *sock, struct socket *newsock,
|
||||
struct sock *newsk);
|
||||
int inet_send_prepare(struct sock *sk);
|
||||
int inet_sendmsg(struct socket *sock, struct msghdr *msg, size_t size);
|
||||
void inet_splice_eof(struct socket *sock);
|
||||
ssize_t inet_sendpage(struct socket *sock, struct page *page, int offset,
|
||||
size_t size, int flags);
|
||||
int inet_recvmsg(struct socket *sock, struct msghdr *msg, size_t size,
|
||||
|
@ -327,6 +327,7 @@ int tcp_sendmsg(struct sock *sk, struct msghdr *msg, size_t size);
|
||||
int tcp_sendmsg_locked(struct sock *sk, struct msghdr *msg, size_t size);
|
||||
int tcp_sendmsg_fastopen(struct sock *sk, struct msghdr *msg, int *copied,
|
||||
size_t size, struct ubuf_info *uarg);
|
||||
void tcp_splice_eof(struct socket *sock);
|
||||
int tcp_sendpage(struct sock *sk, struct page *page, int offset, size_t size,
|
||||
int flags);
|
||||
int tcp_sendpage_locked(struct sock *sk, struct page *page, int offset,
|
||||
|
@ -278,6 +278,7 @@ int udp_get_port(struct sock *sk, unsigned short snum,
|
||||
int udp_err(struct sk_buff *, u32);
|
||||
int udp_abort(struct sock *sk, int err);
|
||||
int udp_sendmsg(struct sock *sk, struct msghdr *msg, size_t len);
|
||||
void udp_splice_eof(struct socket *sock);
|
||||
int udp_push_pending_frames(struct sock *sk);
|
||||
void udp_flush_pending_frames(struct sock *sk);
|
||||
int udp_cmsg_send(struct sock *sk, struct msghdr *msg, u16 *gso_size);
|
||||
|
@ -831,6 +831,21 @@ int inet_sendmsg(struct socket *sock, struct msghdr *msg, size_t size)
|
||||
}
|
||||
EXPORT_SYMBOL(inet_sendmsg);
|
||||
|
||||
void inet_splice_eof(struct socket *sock)
|
||||
{
|
||||
const struct proto *prot;
|
||||
struct sock *sk = sock->sk;
|
||||
|
||||
if (unlikely(inet_send_prepare(sk)))
|
||||
return;
|
||||
|
||||
/* IPV6_ADDRFORM can change sk->sk_prot under us. */
|
||||
prot = READ_ONCE(sk->sk_prot);
|
||||
if (prot->splice_eof)
|
||||
prot->splice_eof(sock);
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(inet_splice_eof);
|
||||
|
||||
ssize_t inet_sendpage(struct socket *sock, struct page *page, int offset,
|
||||
size_t size, int flags)
|
||||
{
|
||||
@ -1050,6 +1065,7 @@ const struct proto_ops inet_stream_ops = {
|
||||
#ifdef CONFIG_MMU
|
||||
.mmap = tcp_mmap,
|
||||
#endif
|
||||
.splice_eof = inet_splice_eof,
|
||||
.sendpage = inet_sendpage,
|
||||
.splice_read = tcp_splice_read,
|
||||
.read_sock = tcp_read_sock,
|
||||
@ -1084,6 +1100,7 @@ const struct proto_ops inet_dgram_ops = {
|
||||
.read_skb = udp_read_skb,
|
||||
.recvmsg = inet_recvmsg,
|
||||
.mmap = sock_no_mmap,
|
||||
.splice_eof = inet_splice_eof,
|
||||
.sendpage = inet_sendpage,
|
||||
.set_peek_off = sk_set_peek_off,
|
||||
#ifdef CONFIG_COMPAT
|
||||
@ -1115,6 +1132,7 @@ static const struct proto_ops inet_sockraw_ops = {
|
||||
.sendmsg = inet_sendmsg,
|
||||
.recvmsg = inet_recvmsg,
|
||||
.mmap = sock_no_mmap,
|
||||
.splice_eof = inet_splice_eof,
|
||||
.sendpage = inet_sendpage,
|
||||
#ifdef CONFIG_COMPAT
|
||||
.compat_ioctl = inet_compat_ioctl,
|
||||
|
@ -1371,6 +1371,22 @@ int tcp_sendmsg(struct sock *sk, struct msghdr *msg, size_t size)
|
||||
}
|
||||
EXPORT_SYMBOL(tcp_sendmsg);
|
||||
|
||||
void tcp_splice_eof(struct socket *sock)
|
||||
{
|
||||
struct sock *sk = sock->sk;
|
||||
struct tcp_sock *tp = tcp_sk(sk);
|
||||
int mss_now, size_goal;
|
||||
|
||||
if (!tcp_write_queue_tail(sk))
|
||||
return;
|
||||
|
||||
lock_sock(sk);
|
||||
mss_now = tcp_send_mss(sk, &size_goal, 0);
|
||||
tcp_push(sk, 0, mss_now, tp->nonagle, size_goal);
|
||||
release_sock(sk);
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(tcp_splice_eof);
|
||||
|
||||
/*
|
||||
* Handle reading urgent data. BSD has very simple semantics for
|
||||
* this, no blocking and very strange errors 8)
|
||||
|
@ -3116,6 +3116,7 @@ struct proto tcp_prot = {
|
||||
.keepalive = tcp_set_keepalive,
|
||||
.recvmsg = tcp_recvmsg,
|
||||
.sendmsg = tcp_sendmsg,
|
||||
.splice_eof = tcp_splice_eof,
|
||||
.sendpage = tcp_sendpage,
|
||||
.backlog_rcv = tcp_v4_do_rcv,
|
||||
.release_cb = tcp_release_cb,
|
||||
|
@ -1324,6 +1324,21 @@ do_confirm:
|
||||
}
|
||||
EXPORT_SYMBOL(udp_sendmsg);
|
||||
|
||||
void udp_splice_eof(struct socket *sock)
|
||||
{
|
||||
struct sock *sk = sock->sk;
|
||||
struct udp_sock *up = udp_sk(sk);
|
||||
|
||||
if (!up->pending || READ_ONCE(up->corkflag))
|
||||
return;
|
||||
|
||||
lock_sock(sk);
|
||||
if (up->pending && !READ_ONCE(up->corkflag))
|
||||
udp_push_pending_frames(sk);
|
||||
release_sock(sk);
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(udp_splice_eof);
|
||||
|
||||
int udp_sendpage(struct sock *sk, struct page *page, int offset,
|
||||
size_t size, int flags)
|
||||
{
|
||||
@ -2918,6 +2933,7 @@ struct proto udp_prot = {
|
||||
.getsockopt = udp_getsockopt,
|
||||
.sendmsg = udp_sendmsg,
|
||||
.recvmsg = udp_recvmsg,
|
||||
.splice_eof = udp_splice_eof,
|
||||
.sendpage = udp_sendpage,
|
||||
.release_cb = ip4_datagram_release_cb,
|
||||
.hash = udp_lib_hash,
|
||||
|
@ -695,6 +695,7 @@ const struct proto_ops inet6_stream_ops = {
|
||||
#ifdef CONFIG_MMU
|
||||
.mmap = tcp_mmap,
|
||||
#endif
|
||||
.splice_eof = inet_splice_eof,
|
||||
.sendpage = inet_sendpage,
|
||||
.sendmsg_locked = tcp_sendmsg_locked,
|
||||
.sendpage_locked = tcp_sendpage_locked,
|
||||
|
@ -2150,6 +2150,7 @@ struct proto tcpv6_prot = {
|
||||
.keepalive = tcp_set_keepalive,
|
||||
.recvmsg = tcp_recvmsg,
|
||||
.sendmsg = tcp_sendmsg,
|
||||
.splice_eof = tcp_splice_eof,
|
||||
.sendpage = tcp_sendpage,
|
||||
.backlog_rcv = tcp_v6_do_rcv,
|
||||
.release_cb = tcp_release_cb,
|
||||
|
@ -1653,6 +1653,20 @@ do_confirm:
|
||||
}
|
||||
EXPORT_SYMBOL(udpv6_sendmsg);
|
||||
|
||||
static void udpv6_splice_eof(struct socket *sock)
|
||||
{
|
||||
struct sock *sk = sock->sk;
|
||||
struct udp_sock *up = udp_sk(sk);
|
||||
|
||||
if (!up->pending || READ_ONCE(up->corkflag))
|
||||
return;
|
||||
|
||||
lock_sock(sk);
|
||||
if (up->pending && !READ_ONCE(up->corkflag))
|
||||
udp_v6_push_pending_frames(sk);
|
||||
release_sock(sk);
|
||||
}
|
||||
|
||||
void udpv6_destroy_sock(struct sock *sk)
|
||||
{
|
||||
struct udp_sock *up = udp_sk(sk);
|
||||
@ -1764,6 +1778,7 @@ struct proto udpv6_prot = {
|
||||
.getsockopt = udpv6_getsockopt,
|
||||
.sendmsg = udpv6_sendmsg,
|
||||
.recvmsg = udpv6_recvmsg,
|
||||
.splice_eof = udpv6_splice_eof,
|
||||
.release_cb = ip6_datagram_release_cb,
|
||||
.hash = udp_lib_hash,
|
||||
.unhash = udp_lib_unhash,
|
||||
|
Loading…
Reference in New Issue
Block a user