ip: convert tcp_sendmsg() to iov_iter primitives
patch is actually smaller than it seems to be - most of it is unindenting the inner loop body in tcp_sendmsg() itself... the bit in tcp_input.c is going to get reverted very soon - that's what memcpy_from_msg() will become, but not in this commit; let's keep it reasonably contained... There's one potentially subtle change here: in case of short copy from userland, mainline tcp_send_syn_data() discards the skb it has allocated and falls back to normal path, where we'll send as much as possible after rereading the same data again. This patch trims SYN+data skb instead - that way we don't need to copy from the same place twice. Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
This commit is contained in:
parent
cacdc7d2f9
commit
57be5bdad7
@ -1803,27 +1803,25 @@ static inline void sk_nocaps_add(struct sock *sk, netdev_features_t flags)
|
|||||||
}
|
}
|
||||||
|
|
||||||
static inline int skb_do_copy_data_nocache(struct sock *sk, struct sk_buff *skb,
|
static inline int skb_do_copy_data_nocache(struct sock *sk, struct sk_buff *skb,
|
||||||
char __user *from, char *to,
|
struct iov_iter *from, char *to,
|
||||||
int copy, int offset)
|
int copy, int offset)
|
||||||
{
|
{
|
||||||
if (skb->ip_summed == CHECKSUM_NONE) {
|
if (skb->ip_summed == CHECKSUM_NONE) {
|
||||||
int err = 0;
|
__wsum csum = 0;
|
||||||
__wsum csum = csum_and_copy_from_user(from, to, copy, 0, &err);
|
if (csum_and_copy_from_iter(to, copy, &csum, from) != copy)
|
||||||
if (err)
|
return -EFAULT;
|
||||||
return err;
|
|
||||||
skb->csum = csum_block_add(skb->csum, csum, offset);
|
skb->csum = csum_block_add(skb->csum, csum, offset);
|
||||||
} else if (sk->sk_route_caps & NETIF_F_NOCACHE_COPY) {
|
} else if (sk->sk_route_caps & NETIF_F_NOCACHE_COPY) {
|
||||||
if (!access_ok(VERIFY_READ, from, copy) ||
|
if (copy_from_iter_nocache(to, copy, from) != copy)
|
||||||
__copy_from_user_nocache(to, from, copy))
|
|
||||||
return -EFAULT;
|
return -EFAULT;
|
||||||
} else if (copy_from_user(to, from, copy))
|
} else if (copy_from_iter(to, copy, from) != copy)
|
||||||
return -EFAULT;
|
return -EFAULT;
|
||||||
|
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
static inline int skb_add_data_nocache(struct sock *sk, struct sk_buff *skb,
|
static inline int skb_add_data_nocache(struct sock *sk, struct sk_buff *skb,
|
||||||
char __user *from, int copy)
|
struct iov_iter *from, int copy)
|
||||||
{
|
{
|
||||||
int err, offset = skb->len;
|
int err, offset = skb->len;
|
||||||
|
|
||||||
@ -1835,7 +1833,7 @@ static inline int skb_add_data_nocache(struct sock *sk, struct sk_buff *skb,
|
|||||||
return err;
|
return err;
|
||||||
}
|
}
|
||||||
|
|
||||||
static inline int skb_copy_to_page_nocache(struct sock *sk, char __user *from,
|
static inline int skb_copy_to_page_nocache(struct sock *sk, struct iov_iter *from,
|
||||||
struct sk_buff *skb,
|
struct sk_buff *skb,
|
||||||
struct page *page,
|
struct page *page,
|
||||||
int off, int copy)
|
int off, int copy)
|
||||||
|
241
net/ipv4/tcp.c
241
net/ipv4/tcp.c
@ -1067,11 +1067,10 @@ static int tcp_sendmsg_fastopen(struct sock *sk, struct msghdr *msg,
|
|||||||
int tcp_sendmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg,
|
int tcp_sendmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg,
|
||||||
size_t size)
|
size_t size)
|
||||||
{
|
{
|
||||||
const struct iovec *iov;
|
|
||||||
struct tcp_sock *tp = tcp_sk(sk);
|
struct tcp_sock *tp = tcp_sk(sk);
|
||||||
struct sk_buff *skb;
|
struct sk_buff *skb;
|
||||||
int iovlen, flags, err, copied = 0;
|
int flags, err, copied = 0;
|
||||||
int mss_now = 0, size_goal, copied_syn = 0, offset = 0;
|
int mss_now = 0, size_goal, copied_syn = 0;
|
||||||
bool sg;
|
bool sg;
|
||||||
long timeo;
|
long timeo;
|
||||||
|
|
||||||
@ -1084,7 +1083,6 @@ int tcp_sendmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg,
|
|||||||
goto out;
|
goto out;
|
||||||
else if (err)
|
else if (err)
|
||||||
goto out_err;
|
goto out_err;
|
||||||
offset = copied_syn;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
timeo = sock_sndtimeo(sk, flags & MSG_DONTWAIT);
|
timeo = sock_sndtimeo(sk, flags & MSG_DONTWAIT);
|
||||||
@ -1118,8 +1116,6 @@ int tcp_sendmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg,
|
|||||||
mss_now = tcp_send_mss(sk, &size_goal, flags);
|
mss_now = tcp_send_mss(sk, &size_goal, flags);
|
||||||
|
|
||||||
/* Ok commence sending. */
|
/* Ok commence sending. */
|
||||||
iovlen = msg->msg_iter.nr_segs;
|
|
||||||
iov = msg->msg_iter.iov;
|
|
||||||
copied = 0;
|
copied = 0;
|
||||||
|
|
||||||
err = -EPIPE;
|
err = -EPIPE;
|
||||||
@ -1128,151 +1124,134 @@ int tcp_sendmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg,
|
|||||||
|
|
||||||
sg = !!(sk->sk_route_caps & NETIF_F_SG);
|
sg = !!(sk->sk_route_caps & NETIF_F_SG);
|
||||||
|
|
||||||
while (--iovlen >= 0) {
|
while (iov_iter_count(&msg->msg_iter)) {
|
||||||
size_t seglen = iov->iov_len;
|
int copy = 0;
|
||||||
unsigned char __user *from = iov->iov_base;
|
int max = size_goal;
|
||||||
|
|
||||||
iov++;
|
skb = tcp_write_queue_tail(sk);
|
||||||
if (unlikely(offset > 0)) { /* Skip bytes copied in SYN */
|
if (tcp_send_head(sk)) {
|
||||||
if (offset >= seglen) {
|
if (skb->ip_summed == CHECKSUM_NONE)
|
||||||
offset -= seglen;
|
max = mss_now;
|
||||||
continue;
|
copy = max - skb->len;
|
||||||
}
|
|
||||||
seglen -= offset;
|
|
||||||
from += offset;
|
|
||||||
offset = 0;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
while (seglen > 0) {
|
if (copy <= 0) {
|
||||||
int copy = 0;
|
|
||||||
int max = size_goal;
|
|
||||||
|
|
||||||
skb = tcp_write_queue_tail(sk);
|
|
||||||
if (tcp_send_head(sk)) {
|
|
||||||
if (skb->ip_summed == CHECKSUM_NONE)
|
|
||||||
max = mss_now;
|
|
||||||
copy = max - skb->len;
|
|
||||||
}
|
|
||||||
|
|
||||||
if (copy <= 0) {
|
|
||||||
new_segment:
|
new_segment:
|
||||||
/* Allocate new segment. If the interface is SG,
|
/* Allocate new segment. If the interface is SG,
|
||||||
* allocate skb fitting to single page.
|
* allocate skb fitting to single page.
|
||||||
*/
|
*/
|
||||||
if (!sk_stream_memory_free(sk))
|
if (!sk_stream_memory_free(sk))
|
||||||
goto wait_for_sndbuf;
|
goto wait_for_sndbuf;
|
||||||
|
|
||||||
skb = sk_stream_alloc_skb(sk,
|
skb = sk_stream_alloc_skb(sk,
|
||||||
select_size(sk, sg),
|
select_size(sk, sg),
|
||||||
sk->sk_allocation);
|
sk->sk_allocation);
|
||||||
if (!skb)
|
if (!skb)
|
||||||
goto wait_for_memory;
|
goto wait_for_memory;
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Check whether we can use HW checksum.
|
* Check whether we can use HW checksum.
|
||||||
*/
|
*/
|
||||||
if (sk->sk_route_caps & NETIF_F_ALL_CSUM)
|
if (sk->sk_route_caps & NETIF_F_ALL_CSUM)
|
||||||
skb->ip_summed = CHECKSUM_PARTIAL;
|
skb->ip_summed = CHECKSUM_PARTIAL;
|
||||||
|
|
||||||
skb_entail(sk, skb);
|
skb_entail(sk, skb);
|
||||||
copy = size_goal;
|
copy = size_goal;
|
||||||
max = size_goal;
|
max = size_goal;
|
||||||
|
|
||||||
/* All packets are restored as if they have
|
/* All packets are restored as if they have
|
||||||
* already been sent. skb_mstamp isn't set to
|
* already been sent. skb_mstamp isn't set to
|
||||||
* avoid wrong rtt estimation.
|
* avoid wrong rtt estimation.
|
||||||
*/
|
*/
|
||||||
if (tp->repair)
|
if (tp->repair)
|
||||||
TCP_SKB_CB(skb)->sacked |= TCPCB_REPAIRED;
|
TCP_SKB_CB(skb)->sacked |= TCPCB_REPAIRED;
|
||||||
}
|
}
|
||||||
|
|
||||||
/* Try to append data to the end of skb. */
|
/* Try to append data to the end of skb. */
|
||||||
if (copy > seglen)
|
if (copy > iov_iter_count(&msg->msg_iter))
|
||||||
copy = seglen;
|
copy = iov_iter_count(&msg->msg_iter);
|
||||||
|
|
||||||
/* Where to copy to? */
|
/* Where to copy to? */
|
||||||
if (skb_availroom(skb) > 0) {
|
if (skb_availroom(skb) > 0) {
|
||||||
/* We have some space in skb head. Superb! */
|
/* We have some space in skb head. Superb! */
|
||||||
copy = min_t(int, copy, skb_availroom(skb));
|
copy = min_t(int, copy, skb_availroom(skb));
|
||||||
err = skb_add_data_nocache(sk, skb, from, copy);
|
err = skb_add_data_nocache(sk, skb, &msg->msg_iter, copy);
|
||||||
if (err)
|
if (err)
|
||||||
goto do_fault;
|
goto do_fault;
|
||||||
} else {
|
} else {
|
||||||
bool merge = true;
|
bool merge = true;
|
||||||
int i = skb_shinfo(skb)->nr_frags;
|
int i = skb_shinfo(skb)->nr_frags;
|
||||||
struct page_frag *pfrag = sk_page_frag(sk);
|
struct page_frag *pfrag = sk_page_frag(sk);
|
||||||
|
|
||||||
if (!sk_page_frag_refill(sk, pfrag))
|
if (!sk_page_frag_refill(sk, pfrag))
|
||||||
goto wait_for_memory;
|
goto wait_for_memory;
|
||||||
|
|
||||||
if (!skb_can_coalesce(skb, i, pfrag->page,
|
if (!skb_can_coalesce(skb, i, pfrag->page,
|
||||||
pfrag->offset)) {
|
pfrag->offset)) {
|
||||||
if (i == MAX_SKB_FRAGS || !sg) {
|
if (i == MAX_SKB_FRAGS || !sg) {
|
||||||
tcp_mark_push(tp, skb);
|
tcp_mark_push(tp, skb);
|
||||||
goto new_segment;
|
goto new_segment;
|
||||||
}
|
|
||||||
merge = false;
|
|
||||||
}
|
}
|
||||||
|
merge = false;
|
||||||
copy = min_t(int, copy, pfrag->size - pfrag->offset);
|
|
||||||
|
|
||||||
if (!sk_wmem_schedule(sk, copy))
|
|
||||||
goto wait_for_memory;
|
|
||||||
|
|
||||||
err = skb_copy_to_page_nocache(sk, from, skb,
|
|
||||||
pfrag->page,
|
|
||||||
pfrag->offset,
|
|
||||||
copy);
|
|
||||||
if (err)
|
|
||||||
goto do_error;
|
|
||||||
|
|
||||||
/* Update the skb. */
|
|
||||||
if (merge) {
|
|
||||||
skb_frag_size_add(&skb_shinfo(skb)->frags[i - 1], copy);
|
|
||||||
} else {
|
|
||||||
skb_fill_page_desc(skb, i, pfrag->page,
|
|
||||||
pfrag->offset, copy);
|
|
||||||
get_page(pfrag->page);
|
|
||||||
}
|
|
||||||
pfrag->offset += copy;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
if (!copied)
|
copy = min_t(int, copy, pfrag->size - pfrag->offset);
|
||||||
TCP_SKB_CB(skb)->tcp_flags &= ~TCPHDR_PSH;
|
|
||||||
|
|
||||||
tp->write_seq += copy;
|
if (!sk_wmem_schedule(sk, copy))
|
||||||
TCP_SKB_CB(skb)->end_seq += copy;
|
goto wait_for_memory;
|
||||||
tcp_skb_pcount_set(skb, 0);
|
|
||||||
|
|
||||||
from += copy;
|
err = skb_copy_to_page_nocache(sk, &msg->msg_iter, skb,
|
||||||
copied += copy;
|
pfrag->page,
|
||||||
if ((seglen -= copy) == 0 && iovlen == 0) {
|
pfrag->offset,
|
||||||
tcp_tx_timestamp(sk, skb);
|
copy);
|
||||||
goto out;
|
if (err)
|
||||||
}
|
|
||||||
|
|
||||||
if (skb->len < max || (flags & MSG_OOB) || unlikely(tp->repair))
|
|
||||||
continue;
|
|
||||||
|
|
||||||
if (forced_push(tp)) {
|
|
||||||
tcp_mark_push(tp, skb);
|
|
||||||
__tcp_push_pending_frames(sk, mss_now, TCP_NAGLE_PUSH);
|
|
||||||
} else if (skb == tcp_send_head(sk))
|
|
||||||
tcp_push_one(sk, mss_now);
|
|
||||||
continue;
|
|
||||||
|
|
||||||
wait_for_sndbuf:
|
|
||||||
set_bit(SOCK_NOSPACE, &sk->sk_socket->flags);
|
|
||||||
wait_for_memory:
|
|
||||||
if (copied)
|
|
||||||
tcp_push(sk, flags & ~MSG_MORE, mss_now,
|
|
||||||
TCP_NAGLE_PUSH, size_goal);
|
|
||||||
|
|
||||||
if ((err = sk_stream_wait_memory(sk, &timeo)) != 0)
|
|
||||||
goto do_error;
|
goto do_error;
|
||||||
|
|
||||||
mss_now = tcp_send_mss(sk, &size_goal, flags);
|
/* Update the skb. */
|
||||||
|
if (merge) {
|
||||||
|
skb_frag_size_add(&skb_shinfo(skb)->frags[i - 1], copy);
|
||||||
|
} else {
|
||||||
|
skb_fill_page_desc(skb, i, pfrag->page,
|
||||||
|
pfrag->offset, copy);
|
||||||
|
get_page(pfrag->page);
|
||||||
|
}
|
||||||
|
pfrag->offset += copy;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
if (!copied)
|
||||||
|
TCP_SKB_CB(skb)->tcp_flags &= ~TCPHDR_PSH;
|
||||||
|
|
||||||
|
tp->write_seq += copy;
|
||||||
|
TCP_SKB_CB(skb)->end_seq += copy;
|
||||||
|
tcp_skb_pcount_set(skb, 0);
|
||||||
|
|
||||||
|
copied += copy;
|
||||||
|
if (!iov_iter_count(&msg->msg_iter)) {
|
||||||
|
tcp_tx_timestamp(sk, skb);
|
||||||
|
goto out;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (skb->len < max || (flags & MSG_OOB) || unlikely(tp->repair))
|
||||||
|
continue;
|
||||||
|
|
||||||
|
if (forced_push(tp)) {
|
||||||
|
tcp_mark_push(tp, skb);
|
||||||
|
__tcp_push_pending_frames(sk, mss_now, TCP_NAGLE_PUSH);
|
||||||
|
} else if (skb == tcp_send_head(sk))
|
||||||
|
tcp_push_one(sk, mss_now);
|
||||||
|
continue;
|
||||||
|
|
||||||
|
wait_for_sndbuf:
|
||||||
|
set_bit(SOCK_NOSPACE, &sk->sk_socket->flags);
|
||||||
|
wait_for_memory:
|
||||||
|
if (copied)
|
||||||
|
tcp_push(sk, flags & ~MSG_MORE, mss_now,
|
||||||
|
TCP_NAGLE_PUSH, size_goal);
|
||||||
|
|
||||||
|
if ((err = sk_stream_wait_memory(sk, &timeo)) != 0)
|
||||||
|
goto do_error;
|
||||||
|
|
||||||
|
mss_now = tcp_send_mss(sk, &size_goal, flags);
|
||||||
}
|
}
|
||||||
|
|
||||||
out:
|
out:
|
||||||
|
@ -4368,7 +4368,7 @@ int tcp_send_rcvq(struct sock *sk, struct msghdr *msg, size_t size)
|
|||||||
if (tcp_try_rmem_schedule(sk, skb, skb->truesize))
|
if (tcp_try_rmem_schedule(sk, skb, skb->truesize))
|
||||||
goto err_free;
|
goto err_free;
|
||||||
|
|
||||||
if (memcpy_from_msg(skb_put(skb, size), msg, size))
|
if (copy_from_iter(skb_put(skb, size), size, &msg->msg_iter) != size)
|
||||||
goto err_free;
|
goto err_free;
|
||||||
|
|
||||||
TCP_SKB_CB(skb)->seq = tcp_sk(sk)->rcv_nxt;
|
TCP_SKB_CB(skb)->seq = tcp_sk(sk)->rcv_nxt;
|
||||||
|
@ -3055,7 +3055,7 @@ static int tcp_send_syn_data(struct sock *sk, struct sk_buff *syn)
|
|||||||
{
|
{
|
||||||
struct tcp_sock *tp = tcp_sk(sk);
|
struct tcp_sock *tp = tcp_sk(sk);
|
||||||
struct tcp_fastopen_request *fo = tp->fastopen_req;
|
struct tcp_fastopen_request *fo = tp->fastopen_req;
|
||||||
int syn_loss = 0, space, err = 0;
|
int syn_loss = 0, space, err = 0, copied;
|
||||||
unsigned long last_syn_loss = 0;
|
unsigned long last_syn_loss = 0;
|
||||||
struct sk_buff *syn_data;
|
struct sk_buff *syn_data;
|
||||||
|
|
||||||
@ -3093,11 +3093,16 @@ static int tcp_send_syn_data(struct sock *sk, struct sk_buff *syn)
|
|||||||
goto fallback;
|
goto fallback;
|
||||||
syn_data->ip_summed = CHECKSUM_PARTIAL;
|
syn_data->ip_summed = CHECKSUM_PARTIAL;
|
||||||
memcpy(syn_data->cb, syn->cb, sizeof(syn->cb));
|
memcpy(syn_data->cb, syn->cb, sizeof(syn->cb));
|
||||||
if (unlikely(memcpy_fromiovecend(skb_put(syn_data, space),
|
copied = copy_from_iter(skb_put(syn_data, space), space,
|
||||||
fo->data->msg_iter.iov, 0, space))) {
|
&fo->data->msg_iter);
|
||||||
|
if (unlikely(!copied)) {
|
||||||
kfree_skb(syn_data);
|
kfree_skb(syn_data);
|
||||||
goto fallback;
|
goto fallback;
|
||||||
}
|
}
|
||||||
|
if (copied != space) {
|
||||||
|
skb_trim(syn_data, copied);
|
||||||
|
space = copied;
|
||||||
|
}
|
||||||
|
|
||||||
/* No more data pending in inet_wait_for_connect() */
|
/* No more data pending in inet_wait_for_connect() */
|
||||||
if (space == fo->size)
|
if (space == fo->size)
|
||||||
|
Loading…
Reference in New Issue
Block a user