diff --git a/drivers/vhost/vsock.c b/drivers/vhost/vsock.c index 6a50e1d0529c..9f57736fe15e 100644 --- a/drivers/vhost/vsock.c +++ b/drivers/vhost/vsock.c @@ -102,7 +102,7 @@ vhost_transport_do_send_pkt(struct vhost_vsock *vsock, struct iov_iter iov_iter; unsigned out, in; size_t nbytes; - size_t len; + size_t iov_len, payload_len; int head; spin_lock_bh(&vsock->send_pkt_list_lock); @@ -147,8 +147,24 @@ vhost_transport_do_send_pkt(struct vhost_vsock *vsock, break; } - len = iov_length(&vq->iov[out], in); - iov_iter_init(&iov_iter, READ, &vq->iov[out], in, len); + iov_len = iov_length(&vq->iov[out], in); + if (iov_len < sizeof(pkt->hdr)) { + virtio_transport_free_pkt(pkt); + vq_err(vq, "Buffer len [%zu] too small\n", iov_len); + break; + } + + iov_iter_init(&iov_iter, READ, &vq->iov[out], in, iov_len); + payload_len = pkt->len - pkt->off; + + /* If the packet is greater than the space available in the + * buffer, we split it using multiple buffers. + */ + if (payload_len > iov_len - sizeof(pkt->hdr)) + payload_len = iov_len - sizeof(pkt->hdr); + + /* Set the correct length in the header */ + pkt->hdr.len = cpu_to_le32(payload_len); nbytes = copy_to_iter(&pkt->hdr, sizeof(pkt->hdr), &iov_iter); if (nbytes != sizeof(pkt->hdr)) { @@ -157,33 +173,47 @@ vhost_transport_do_send_pkt(struct vhost_vsock *vsock, break; } - nbytes = copy_to_iter(pkt->buf, pkt->len, &iov_iter); - if (nbytes != pkt->len) { + nbytes = copy_to_iter(pkt->buf + pkt->off, payload_len, + &iov_iter); + if (nbytes != payload_len) { virtio_transport_free_pkt(pkt); vq_err(vq, "Faulted on copying pkt buf\n"); break; } - vhost_add_used(vq, head, sizeof(pkt->hdr) + pkt->len); + vhost_add_used(vq, head, sizeof(pkt->hdr) + payload_len); added = true; - if (pkt->reply) { - int val; - - val = atomic_dec_return(&vsock->queued_replies); - - /* Do we have resources to resume tx processing? */ - if (val + 1 == tx_vq->num) - restart_tx = true; - } - /* Deliver to monitoring devices all correctly transmitted * packets. */ virtio_transport_deliver_tap_pkt(pkt); - total_len += pkt->len; - virtio_transport_free_pkt(pkt); + pkt->off += payload_len; + total_len += payload_len; + + /* If we didn't send all the payload we can requeue the packet + * to send it with the next available buffer. + */ + if (pkt->off < pkt->len) { + spin_lock_bh(&vsock->send_pkt_list_lock); + list_add(&pkt->list, &vsock->send_pkt_list); + spin_unlock_bh(&vsock->send_pkt_list_lock); + } else { + if (pkt->reply) { + int val; + + val = atomic_dec_return(&vsock->queued_replies); + + /* Do we have resources to resume tx + * processing? + */ + if (val + 1 == tx_vq->num) + restart_tx = true; + } + + virtio_transport_free_pkt(pkt); + } } while(likely(!vhost_exceeds_weight(vq, ++pkts, total_len))); if (added) vhost_signal(&vsock->dev, vq); @@ -329,6 +359,8 @@ vhost_vsock_alloc_pkt(struct vhost_virtqueue *vq, return NULL; } + pkt->buf_len = pkt->len; + nbytes = copy_from_iter(pkt->buf, pkt->len, &iov_iter); if (nbytes != pkt->len) { vq_err(vq, "Expected %u byte payload, got %zu bytes\n", diff --git a/include/linux/virtio_vsock.h b/include/linux/virtio_vsock.h index e223e2632edd..4c7781f4b29b 100644 --- a/include/linux/virtio_vsock.h +++ b/include/linux/virtio_vsock.h @@ -35,13 +35,14 @@ struct virtio_vsock_sock { /* Protected by tx_lock */ u32 tx_cnt; - u32 buf_alloc; u32 peer_fwd_cnt; u32 peer_buf_alloc; /* Protected by rx_lock */ u32 fwd_cnt; + u32 last_fwd_cnt; u32 rx_bytes; + u32 buf_alloc; struct list_head rx_queue; }; @@ -52,6 +53,7 @@ struct virtio_vsock_pkt { /* socket refcnt not held, only use for cancellation */ struct vsock_sock *vsk; void *buf; + u32 buf_len; u32 len; u32 off; bool reply; diff --git a/net/vmw_vsock/virtio_transport.c b/net/vmw_vsock/virtio_transport.c index 0815d1357861..082a30936690 100644 --- a/net/vmw_vsock/virtio_transport.c +++ b/net/vmw_vsock/virtio_transport.c @@ -307,6 +307,7 @@ static void virtio_vsock_rx_fill(struct virtio_vsock *vsock) break; } + pkt->buf_len = buf_len; pkt->len = buf_len; sg_init_one(&hdr, &pkt->hdr, sizeof(pkt->hdr)); diff --git a/net/vmw_vsock/virtio_transport_common.c b/net/vmw_vsock/virtio_transport_common.c index 6f1a8aff65c5..94cc0fa3e848 100644 --- a/net/vmw_vsock/virtio_transport_common.c +++ b/net/vmw_vsock/virtio_transport_common.c @@ -26,6 +26,9 @@ /* How long to wait for graceful shutdown of a connection */ #define VSOCK_CLOSE_TIMEOUT (8 * HZ) +/* Threshold for detecting small packets to copy */ +#define GOOD_COPY_LEN 128 + static const struct virtio_transport *virtio_transport_get_ops(void) { const struct vsock_transport *t = vsock_core_get_transport(); @@ -64,6 +67,9 @@ virtio_transport_alloc_pkt(struct virtio_vsock_pkt_info *info, pkt->buf = kmalloc(len, GFP_KERNEL); if (!pkt->buf) goto out_pkt; + + pkt->buf_len = len; + err = memcpy_from_msg(pkt->buf, info->msg, len); if (err) goto out; @@ -91,8 +97,17 @@ static struct sk_buff *virtio_transport_build_skb(void *opaque) struct virtio_vsock_pkt *pkt = opaque; struct af_vsockmon_hdr *hdr; struct sk_buff *skb; + size_t payload_len; + void *payload_buf; - skb = alloc_skb(sizeof(*hdr) + sizeof(pkt->hdr) + pkt->len, + /* A packet could be split to fit the RX buffer, so we can retrieve + * the payload length from the header and the buffer pointer taking + * care of the offset in the original packet. + */ + payload_len = le32_to_cpu(pkt->hdr.len); + payload_buf = pkt->buf + pkt->off; + + skb = alloc_skb(sizeof(*hdr) + sizeof(pkt->hdr) + payload_len, GFP_ATOMIC); if (!skb) return NULL; @@ -132,8 +147,8 @@ static struct sk_buff *virtio_transport_build_skb(void *opaque) skb_put_data(skb, &pkt->hdr, sizeof(pkt->hdr)); - if (pkt->len) { - skb_put_data(skb, pkt->buf, pkt->len); + if (payload_len) { + skb_put_data(skb, payload_buf, payload_len); } return skb; @@ -166,8 +181,8 @@ static int virtio_transport_send_pkt_info(struct vsock_sock *vsk, vvs = vsk->trans; /* we can send less than pkt_len bytes */ - if (pkt_len > VIRTIO_VSOCK_DEFAULT_RX_BUF_SIZE) - pkt_len = VIRTIO_VSOCK_DEFAULT_RX_BUF_SIZE; + if (pkt_len > VIRTIO_VSOCK_MAX_PKT_BUF_SIZE) + pkt_len = VIRTIO_VSOCK_MAX_PKT_BUF_SIZE; /* virtio_transport_get_credit might return less than pkt_len credit */ pkt_len = virtio_transport_get_credit(vvs, pkt_len); @@ -204,10 +219,11 @@ static void virtio_transport_dec_rx_pkt(struct virtio_vsock_sock *vvs, void virtio_transport_inc_tx_pkt(struct virtio_vsock_sock *vvs, struct virtio_vsock_pkt *pkt) { - spin_lock_bh(&vvs->tx_lock); + spin_lock_bh(&vvs->rx_lock); + vvs->last_fwd_cnt = vvs->fwd_cnt; pkt->hdr.fwd_cnt = cpu_to_le32(vvs->fwd_cnt); pkt->hdr.buf_alloc = cpu_to_le32(vvs->buf_alloc); - spin_unlock_bh(&vvs->tx_lock); + spin_unlock_bh(&vvs->rx_lock); } EXPORT_SYMBOL_GPL(virtio_transport_inc_tx_pkt); @@ -255,6 +271,7 @@ virtio_transport_stream_do_dequeue(struct vsock_sock *vsk, struct virtio_vsock_sock *vvs = vsk->trans; struct virtio_vsock_pkt *pkt; size_t bytes, total = 0; + u32 free_space; int err = -EFAULT; spin_lock_bh(&vvs->rx_lock); @@ -285,11 +302,19 @@ virtio_transport_stream_do_dequeue(struct vsock_sock *vsk, virtio_transport_free_pkt(pkt); } } + + free_space = vvs->buf_alloc - (vvs->fwd_cnt - vvs->last_fwd_cnt); + spin_unlock_bh(&vvs->rx_lock); - /* Send a credit pkt to peer */ - virtio_transport_send_credit_update(vsk, VIRTIO_VSOCK_TYPE_STREAM, - NULL); + /* We send a credit update only when the space available seen + * by the transmitter is less than VIRTIO_VSOCK_MAX_PKT_BUF_SIZE + */ + if (free_space < VIRTIO_VSOCK_MAX_PKT_BUF_SIZE) { + virtio_transport_send_credit_update(vsk, + VIRTIO_VSOCK_TYPE_STREAM, + NULL); + } return total; @@ -841,24 +866,60 @@ destroy: return err; } +static void +virtio_transport_recv_enqueue(struct vsock_sock *vsk, + struct virtio_vsock_pkt *pkt) +{ + struct virtio_vsock_sock *vvs = vsk->trans; + bool free_pkt = false; + + pkt->len = le32_to_cpu(pkt->hdr.len); + pkt->off = 0; + + spin_lock_bh(&vvs->rx_lock); + + virtio_transport_inc_rx_pkt(vvs, pkt); + + /* Try to copy small packets into the buffer of last packet queued, + * to avoid wasting memory queueing the entire buffer with a small + * payload. + */ + if (pkt->len <= GOOD_COPY_LEN && !list_empty(&vvs->rx_queue)) { + struct virtio_vsock_pkt *last_pkt; + + last_pkt = list_last_entry(&vvs->rx_queue, + struct virtio_vsock_pkt, list); + + /* If there is space in the last packet queued, we copy the + * new packet in its buffer. + */ + if (pkt->len <= last_pkt->buf_len - last_pkt->len) { + memcpy(last_pkt->buf + last_pkt->len, pkt->buf, + pkt->len); + last_pkt->len += pkt->len; + free_pkt = true; + goto out; + } + } + + list_add_tail(&pkt->list, &vvs->rx_queue); + +out: + spin_unlock_bh(&vvs->rx_lock); + if (free_pkt) + virtio_transport_free_pkt(pkt); +} + static int virtio_transport_recv_connected(struct sock *sk, struct virtio_vsock_pkt *pkt) { struct vsock_sock *vsk = vsock_sk(sk); - struct virtio_vsock_sock *vvs = vsk->trans; int err = 0; switch (le16_to_cpu(pkt->hdr.op)) { case VIRTIO_VSOCK_OP_RW: - pkt->len = le32_to_cpu(pkt->hdr.len); - pkt->off = 0; - - spin_lock_bh(&vvs->rx_lock); - virtio_transport_inc_rx_pkt(vvs, pkt); - list_add_tail(&pkt->list, &vvs->rx_queue); - spin_unlock_bh(&vvs->rx_lock); - + virtio_transport_recv_enqueue(vsk, pkt); sk->sk_data_ready(sk); return err; case VIRTIO_VSOCK_OP_CREDIT_UPDATE: