diff --git a/drivers/net/virtio_net.c b/drivers/net/virtio_net.c index 231ad91a919d..fe7f314d65c9 100644 --- a/drivers/net/virtio_net.c +++ b/drivers/net/virtio_net.c @@ -132,6 +132,14 @@ struct virtnet_interrupt_coalesce { u32 max_usecs; }; +/* The dma information of pages allocated at a time. */ +struct virtnet_rq_dma { + dma_addr_t addr; + u32 ref; + u16 len; + u16 need_sync; +}; + /* Internal representation of a send virtqueue */ struct send_queue { /* Virtqueue associated with this send _queue */ @@ -185,6 +193,12 @@ struct receive_queue { char name[16]; struct xdp_rxq_info xdp_rxq; + + /* Record the last dma info to free after new pages is allocated. */ + struct virtnet_rq_dma *last_dma; + + /* Do dma by self */ + bool do_dma; }; /* This structure can contain rss message with maximum settings for indirection table and keysize @@ -580,6 +594,156 @@ ok: return skb; } +static void virtnet_rq_unmap(struct receive_queue *rq, void *buf, u32 len) +{ + struct page *page = virt_to_head_page(buf); + struct virtnet_rq_dma *dma; + void *head; + int offset; + + head = page_address(page); + + dma = head; + + --dma->ref; + + if (dma->ref) { + if (dma->need_sync && len) { + offset = buf - (head + sizeof(*dma)); + + virtqueue_dma_sync_single_range_for_cpu(rq->vq, dma->addr, offset, + len, DMA_FROM_DEVICE); + } + + return; + } + + virtqueue_dma_unmap_single_attrs(rq->vq, dma->addr, dma->len, + DMA_FROM_DEVICE, DMA_ATTR_SKIP_CPU_SYNC); + put_page(page); +} + +static void *virtnet_rq_get_buf(struct receive_queue *rq, u32 *len, void **ctx) +{ + void *buf; + + buf = virtqueue_get_buf_ctx(rq->vq, len, ctx); + if (buf && rq->do_dma) + virtnet_rq_unmap(rq, buf, *len); + + return buf; +} + +static void *virtnet_rq_detach_unused_buf(struct receive_queue *rq) +{ + void *buf; + + buf = virtqueue_detach_unused_buf(rq->vq); + if (buf && rq->do_dma) + virtnet_rq_unmap(rq, buf, 0); + + return buf; +} + +static void virtnet_rq_init_one_sg(struct receive_queue *rq, void *buf, u32 len) +{ + struct virtnet_rq_dma *dma; + dma_addr_t addr; + u32 offset; + void *head; + + if (!rq->do_dma) { + sg_init_one(rq->sg, buf, len); + return; + } + + head = page_address(rq->alloc_frag.page); + + offset = buf - head; + + dma = head; + + addr = dma->addr - sizeof(*dma) + offset; + + sg_init_table(rq->sg, 1); + rq->sg[0].dma_address = addr; + rq->sg[0].length = len; +} + +static void *virtnet_rq_alloc(struct receive_queue *rq, u32 size, gfp_t gfp) +{ + struct page_frag *alloc_frag = &rq->alloc_frag; + struct virtnet_rq_dma *dma; + void *buf, *head; + dma_addr_t addr; + + if (unlikely(!skb_page_frag_refill(size, alloc_frag, gfp))) + return NULL; + + head = page_address(alloc_frag->page); + + if (rq->do_dma) { + dma = head; + + /* new pages */ + if (!alloc_frag->offset) { + if (rq->last_dma) { + /* Now, the new page is allocated, the last dma + * will not be used. So the dma can be unmapped + * if the ref is 0. + */ + virtnet_rq_unmap(rq, rq->last_dma, 0); + rq->last_dma = NULL; + } + + dma->len = alloc_frag->size - sizeof(*dma); + + addr = virtqueue_dma_map_single_attrs(rq->vq, dma + 1, + dma->len, DMA_FROM_DEVICE, 0); + if (virtqueue_dma_mapping_error(rq->vq, addr)) + return NULL; + + dma->addr = addr; + dma->need_sync = virtqueue_dma_need_sync(rq->vq, addr); + + /* Add a reference to dma to prevent the entire dma from + * being released during error handling. This reference + * will be freed after the pages are no longer used. + */ + get_page(alloc_frag->page); + dma->ref = 1; + alloc_frag->offset = sizeof(*dma); + + rq->last_dma = dma; + } + + ++dma->ref; + } + + buf = head + alloc_frag->offset; + + get_page(alloc_frag->page); + alloc_frag->offset += size; + + return buf; +} + +static void virtnet_rq_set_premapped(struct virtnet_info *vi) +{ + int i; + + /* disable for big mode */ + if (!vi->mergeable_rx_bufs && vi->big_packets) + return; + + for (i = 0; i < vi->max_queue_pairs; i++) { + if (virtqueue_set_dma_premapped(vi->rq[i].vq)) + continue; + + vi->rq[i].do_dma = true; + } +} + static void free_old_xmit_skbs(struct send_queue *sq, bool in_napi) { unsigned int len; @@ -935,7 +1099,7 @@ static struct page *xdp_linearize_page(struct receive_queue *rq, void *buf; int off; - buf = virtqueue_get_buf(rq->vq, &buflen); + buf = virtnet_rq_get_buf(rq, &buflen, NULL); if (unlikely(!buf)) goto err_buf; @@ -1155,7 +1319,7 @@ static void mergeable_buf_free(struct receive_queue *rq, int num_buf, int len; while (num_buf-- > 1) { - buf = virtqueue_get_buf(rq->vq, &len); + buf = virtnet_rq_get_buf(rq, &len, NULL); if (unlikely(!buf)) { pr_debug("%s: rx error: %d buffers missing\n", dev->name, num_buf); @@ -1263,7 +1427,7 @@ static int virtnet_build_xdp_buff_mrg(struct net_device *dev, return -EINVAL; while (--*num_buf > 0) { - buf = virtqueue_get_buf_ctx(rq->vq, &len, &ctx); + buf = virtnet_rq_get_buf(rq, &len, &ctx); if (unlikely(!buf)) { pr_debug("%s: rx error: %d buffers out of %d missing\n", dev->name, *num_buf, @@ -1492,7 +1656,7 @@ static struct sk_buff *receive_mergeable(struct net_device *dev, while (--num_buf) { int num_skb_frags; - buf = virtqueue_get_buf_ctx(rq->vq, &len, &ctx); + buf = virtnet_rq_get_buf(rq, &len, &ctx); if (unlikely(!buf)) { pr_debug("%s: rx error: %d buffers out of %d missing\n", dev->name, num_buf, @@ -1651,7 +1815,6 @@ frame_err: static int add_recvbuf_small(struct virtnet_info *vi, struct receive_queue *rq, gfp_t gfp) { - struct page_frag *alloc_frag = &rq->alloc_frag; char *buf; unsigned int xdp_headroom = virtnet_get_headroom(vi); void *ctx = (void *)(unsigned long)xdp_headroom; @@ -1660,17 +1823,21 @@ static int add_recvbuf_small(struct virtnet_info *vi, struct receive_queue *rq, len = SKB_DATA_ALIGN(len) + SKB_DATA_ALIGN(sizeof(struct skb_shared_info)); - if (unlikely(!skb_page_frag_refill(len, alloc_frag, gfp))) + + buf = virtnet_rq_alloc(rq, len, gfp); + if (unlikely(!buf)) return -ENOMEM; - buf = (char *)page_address(alloc_frag->page) + alloc_frag->offset; - get_page(alloc_frag->page); - alloc_frag->offset += len; - sg_init_one(rq->sg, buf + VIRTNET_RX_PAD + xdp_headroom, - vi->hdr_len + GOOD_PACKET_LEN); + virtnet_rq_init_one_sg(rq, buf + VIRTNET_RX_PAD + xdp_headroom, + vi->hdr_len + GOOD_PACKET_LEN); + err = virtqueue_add_inbuf_ctx(rq->vq, rq->sg, 1, buf, ctx, gfp); - if (err < 0) + if (err < 0) { + if (rq->do_dma) + virtnet_rq_unmap(rq, buf, 0); put_page(virt_to_head_page(buf)); + } + return err; } @@ -1747,23 +1914,22 @@ static int add_recvbuf_mergeable(struct virtnet_info *vi, unsigned int headroom = virtnet_get_headroom(vi); unsigned int tailroom = headroom ? sizeof(struct skb_shared_info) : 0; unsigned int room = SKB_DATA_ALIGN(headroom + tailroom); - char *buf; - void *ctx; - int err; unsigned int len, hole; + void *ctx; + char *buf; + int err; /* Extra tailroom is needed to satisfy XDP's assumption. This * means rx frags coalescing won't work, but consider we've * disabled GSO for XDP, it won't be a big issue. */ len = get_mergeable_buf_len(rq, &rq->mrg_avg_pkt_len, room); - if (unlikely(!skb_page_frag_refill(len + room, alloc_frag, gfp))) + + buf = virtnet_rq_alloc(rq, len + room, gfp); + if (unlikely(!buf)) return -ENOMEM; - buf = (char *)page_address(alloc_frag->page) + alloc_frag->offset; buf += headroom; /* advance address leaving hole at front of pkt */ - get_page(alloc_frag->page); - alloc_frag->offset += len + room; hole = alloc_frag->size - alloc_frag->offset; if (hole < len + room) { /* To avoid internal fragmentation, if there is very likely not @@ -1777,11 +1943,15 @@ static int add_recvbuf_mergeable(struct virtnet_info *vi, alloc_frag->offset += hole; } - sg_init_one(rq->sg, buf, len); + virtnet_rq_init_one_sg(rq, buf, len); + ctx = mergeable_len_to_ctx(len + room, headroom); err = virtqueue_add_inbuf_ctx(rq->vq, rq->sg, 1, buf, ctx, gfp); - if (err < 0) + if (err < 0) { + if (rq->do_dma) + virtnet_rq_unmap(rq, buf, 0); put_page(virt_to_head_page(buf)); + } return err; } @@ -1902,13 +2072,13 @@ static int virtnet_receive(struct receive_queue *rq, int budget, void *ctx; while (stats.packets < budget && - (buf = virtqueue_get_buf_ctx(rq->vq, &len, &ctx))) { + (buf = virtnet_rq_get_buf(rq, &len, &ctx))) { receive_buf(vi, rq, buf, len, ctx, xdp_xmit, &stats); stats.packets++; } } else { while (stats.packets < budget && - (buf = virtqueue_get_buf(rq->vq, &len)) != NULL) { + (buf = virtnet_rq_get_buf(rq, &len, NULL)) != NULL) { receive_buf(vi, rq, buf, len, NULL, xdp_xmit, &stats); stats.packets++; } @@ -3808,8 +3978,11 @@ static void free_receive_page_frags(struct virtnet_info *vi) { int i; for (i = 0; i < vi->max_queue_pairs; i++) - if (vi->rq[i].alloc_frag.page) + if (vi->rq[i].alloc_frag.page) { + if (vi->rq[i].do_dma && vi->rq[i].last_dma) + virtnet_rq_unmap(&vi->rq[i], vi->rq[i].last_dma, 0); put_page(vi->rq[i].alloc_frag.page); + } } static void virtnet_sq_free_unused_buf(struct virtqueue *vq, void *buf) @@ -3846,9 +4019,10 @@ static void free_unused_bufs(struct virtnet_info *vi) } for (i = 0; i < vi->max_queue_pairs; i++) { - struct virtqueue *vq = vi->rq[i].vq; - while ((buf = virtqueue_detach_unused_buf(vq)) != NULL) - virtnet_rq_free_unused_buf(vq, buf); + struct receive_queue *rq = &vi->rq[i]; + + while ((buf = virtnet_rq_detach_unused_buf(rq)) != NULL) + virtnet_rq_free_unused_buf(rq->vq, buf); cond_resched(); } } @@ -4022,6 +4196,8 @@ static int init_vqs(struct virtnet_info *vi) if (ret) goto err_free; + virtnet_rq_set_premapped(vi); + cpus_read_lock(); virtnet_set_affinity(vi); cpus_read_unlock(); diff --git a/drivers/vdpa/mlx5/core/mlx5_vdpa.h b/drivers/vdpa/mlx5/core/mlx5_vdpa.h index b53420e874ac..ca56242972b3 100644 --- a/drivers/vdpa/mlx5/core/mlx5_vdpa.h +++ b/drivers/vdpa/mlx5/core/mlx5_vdpa.h @@ -100,9 +100,6 @@ struct mlx5_vdpa_dev { bool suspended; }; -int mlx5_vdpa_alloc_pd(struct mlx5_vdpa_dev *dev, u32 *pdn, u16 uid); -int mlx5_vdpa_dealloc_pd(struct mlx5_vdpa_dev *dev, u32 pdn, u16 uid); -int mlx5_vdpa_get_null_mkey(struct mlx5_vdpa_dev *dev, u32 *null_mkey); int mlx5_vdpa_create_tis(struct mlx5_vdpa_dev *mvdev, void *in, u32 *tisn); void mlx5_vdpa_destroy_tis(struct mlx5_vdpa_dev *mvdev, u32 tisn); int mlx5_vdpa_create_rqt(struct mlx5_vdpa_dev *mvdev, void *in, int inlen, u32 *rqtn); diff --git a/drivers/vdpa/vdpa_sim/vdpa_sim.c b/drivers/vdpa/vdpa_sim/vdpa_sim.c index d343af4fa60e..76d41058add9 100644 --- a/drivers/vdpa/vdpa_sim/vdpa_sim.c +++ b/drivers/vdpa/vdpa_sim/vdpa_sim.c @@ -18,6 +18,7 @@ #include #include #include +#include #include "vdpa_sim.h" @@ -410,6 +411,11 @@ static u64 vdpasim_get_device_features(struct vdpa_device *vdpa) return vdpasim->dev_attr.supported_features; } +static u64 vdpasim_get_backend_features(const struct vdpa_device *vdpa) +{ + return BIT_ULL(VHOST_BACKEND_F_ENABLE_AFTER_DRIVER_OK); +} + static int vdpasim_set_driver_features(struct vdpa_device *vdpa, u64 features) { struct vdpasim *vdpasim = vdpa_to_sim(vdpa); @@ -733,6 +739,7 @@ static const struct vdpa_config_ops vdpasim_config_ops = { .get_vq_align = vdpasim_get_vq_align, .get_vq_group = vdpasim_get_vq_group, .get_device_features = vdpasim_get_device_features, + .get_backend_features = vdpasim_get_backend_features, .set_driver_features = vdpasim_set_driver_features, .get_driver_features = vdpasim_get_driver_features, .set_config_cb = vdpasim_set_config_cb, @@ -770,6 +777,7 @@ static const struct vdpa_config_ops vdpasim_batch_config_ops = { .get_vq_align = vdpasim_get_vq_align, .get_vq_group = vdpasim_get_vq_group, .get_device_features = vdpasim_get_device_features, + .get_backend_features = vdpasim_get_backend_features, .set_driver_features = vdpasim_set_driver_features, .get_driver_features = vdpasim_get_driver_features, .set_config_cb = vdpasim_set_config_cb, diff --git a/drivers/vhost/vdpa.c b/drivers/vhost/vdpa.c index b43e8680eee8..78379ffd2336 100644 --- a/drivers/vhost/vdpa.c +++ b/drivers/vhost/vdpa.c @@ -403,6 +403,17 @@ static long vhost_vdpa_get_features(struct vhost_vdpa *v, u64 __user *featurep) return 0; } +static u64 vhost_vdpa_get_backend_features(const struct vhost_vdpa *v) +{ + struct vdpa_device *vdpa = v->vdpa; + const struct vdpa_config_ops *ops = vdpa->config; + + if (!ops->get_backend_features) + return 0; + else + return ops->get_backend_features(vdpa); +} + static long vhost_vdpa_set_features(struct vhost_vdpa *v, u64 __user *featurep) { struct vdpa_device *vdpa = v->vdpa; @@ -680,7 +691,8 @@ static long vhost_vdpa_unlocked_ioctl(struct file *filep, return -EFAULT; if (features & ~(VHOST_VDPA_BACKEND_FEATURES | BIT_ULL(VHOST_BACKEND_F_SUSPEND) | - BIT_ULL(VHOST_BACKEND_F_RESUME))) + BIT_ULL(VHOST_BACKEND_F_RESUME) | + BIT_ULL(VHOST_BACKEND_F_ENABLE_AFTER_DRIVER_OK))) return -EOPNOTSUPP; if ((features & BIT_ULL(VHOST_BACKEND_F_SUSPEND)) && !vhost_vdpa_can_suspend(v)) @@ -741,6 +753,7 @@ static long vhost_vdpa_unlocked_ioctl(struct file *filep, features |= BIT_ULL(VHOST_BACKEND_F_SUSPEND); if (vhost_vdpa_can_resume(v)) features |= BIT_ULL(VHOST_BACKEND_F_RESUME); + features |= vhost_vdpa_get_backend_features(v); if (copy_to_user(featurep, &features, sizeof(features))) r = -EFAULT; break; diff --git a/drivers/virtio/virtio_ring.c b/drivers/virtio/virtio_ring.c index c5310eaf8b46..51d8f3299c10 100644 --- a/drivers/virtio/virtio_ring.c +++ b/drivers/virtio/virtio_ring.c @@ -172,6 +172,14 @@ struct vring_virtqueue { /* Host publishes avail event idx */ bool event; + /* Do DMA mapping by driver */ + bool premapped; + + /* Do unmap or not for desc. Just when premapped is False and + * use_dma_api is true, this is true. + */ + bool do_unmap; + /* Head of free buffer list. */ unsigned int free_head; /* Number we've added since last sync. */ @@ -355,10 +363,14 @@ static struct device *vring_dma_dev(const struct vring_virtqueue *vq) } /* Map one sg entry. */ -static dma_addr_t vring_map_one_sg(const struct vring_virtqueue *vq, - struct scatterlist *sg, - enum dma_data_direction direction) +static int vring_map_one_sg(const struct vring_virtqueue *vq, struct scatterlist *sg, + enum dma_data_direction direction, dma_addr_t *addr) { + if (vq->premapped) { + *addr = sg_dma_address(sg); + return 0; + } + if (!vq->use_dma_api) { /* * If DMA is not used, KMSAN doesn't know that the scatterlist @@ -366,7 +378,8 @@ static dma_addr_t vring_map_one_sg(const struct vring_virtqueue *vq, * depending on the direction. */ kmsan_handle_dma(sg_page(sg), sg->offset, sg->length, direction); - return (dma_addr_t)sg_phys(sg); + *addr = (dma_addr_t)sg_phys(sg); + return 0; } /* @@ -374,9 +387,14 @@ static dma_addr_t vring_map_one_sg(const struct vring_virtqueue *vq, * the way it expects (we don't guarantee that the scatterlist * will exist for the lifetime of the mapping). */ - return dma_map_page(vring_dma_dev(vq), + *addr = dma_map_page(vring_dma_dev(vq), sg_page(sg), sg->offset, sg->length, direction); + + if (dma_mapping_error(vring_dma_dev(vq), *addr)) + return -ENOMEM; + + return 0; } static dma_addr_t vring_map_single(const struct vring_virtqueue *vq, @@ -427,7 +445,7 @@ static void vring_unmap_one_split_indirect(const struct vring_virtqueue *vq, { u16 flags; - if (!vq->use_dma_api) + if (!vq->do_unmap) return; flags = virtio16_to_cpu(vq->vq.vdev, desc->flags); @@ -445,18 +463,21 @@ static unsigned int vring_unmap_one_split(const struct vring_virtqueue *vq, struct vring_desc_extra *extra = vq->split.desc_extra; u16 flags; - if (!vq->use_dma_api) - goto out; - flags = extra[i].flags; if (flags & VRING_DESC_F_INDIRECT) { + if (!vq->use_dma_api) + goto out; + dma_unmap_single(vring_dma_dev(vq), extra[i].addr, extra[i].len, (flags & VRING_DESC_F_WRITE) ? DMA_FROM_DEVICE : DMA_TO_DEVICE); } else { + if (!vq->do_unmap) + goto out; + dma_unmap_page(vring_dma_dev(vq), extra[i].addr, extra[i].len, @@ -588,8 +609,9 @@ static inline int virtqueue_add_split(struct virtqueue *_vq, for (n = 0; n < out_sgs; n++) { for (sg = sgs[n]; sg; sg = sg_next(sg)) { - dma_addr_t addr = vring_map_one_sg(vq, sg, DMA_TO_DEVICE); - if (vring_mapping_error(vq, addr)) + dma_addr_t addr; + + if (vring_map_one_sg(vq, sg, DMA_TO_DEVICE, &addr)) goto unmap_release; prev = i; @@ -603,8 +625,9 @@ static inline int virtqueue_add_split(struct virtqueue *_vq, } for (; n < (out_sgs + in_sgs); n++) { for (sg = sgs[n]; sg; sg = sg_next(sg)) { - dma_addr_t addr = vring_map_one_sg(vq, sg, DMA_FROM_DEVICE); - if (vring_mapping_error(vq, addr)) + dma_addr_t addr; + + if (vring_map_one_sg(vq, sg, DMA_FROM_DEVICE, &addr)) goto unmap_release; prev = i; @@ -620,7 +643,7 @@ static inline int virtqueue_add_split(struct virtqueue *_vq, } /* Last one doesn't continue. */ desc[prev].flags &= cpu_to_virtio16(_vq->vdev, ~VRING_DESC_F_NEXT); - if (!indirect && vq->use_dma_api) + if (!indirect && vq->do_unmap) vq->split.desc_extra[prev & (vq->split.vring.num - 1)].flags &= ~VRING_DESC_F_NEXT; @@ -629,8 +652,12 @@ static inline int virtqueue_add_split(struct virtqueue *_vq, dma_addr_t addr = vring_map_single( vq, desc, total_sg * sizeof(struct vring_desc), DMA_TO_DEVICE); - if (vring_mapping_error(vq, addr)) + if (vring_mapping_error(vq, addr)) { + if (vq->premapped) + goto free_indirect; + goto unmap_release; + } virtqueue_add_desc_split(_vq, vq->split.vring.desc, head, addr, @@ -696,6 +723,7 @@ unmap_release: i = vring_unmap_one_split(vq, i); } +free_indirect: if (indirect) kfree(desc); @@ -774,8 +802,10 @@ static void detach_buf_split(struct vring_virtqueue *vq, unsigned int head, VRING_DESC_F_INDIRECT)); BUG_ON(len == 0 || len % sizeof(struct vring_desc)); - for (j = 0; j < len / sizeof(struct vring_desc); j++) - vring_unmap_one_split_indirect(vq, &indir_desc[j]); + if (vq->do_unmap) { + for (j = 0; j < len / sizeof(struct vring_desc); j++) + vring_unmap_one_split_indirect(vq, &indir_desc[j]); + } kfree(indir_desc); vq->split.desc_state[head].indir_desc = NULL; @@ -1195,17 +1225,20 @@ static void vring_unmap_extra_packed(const struct vring_virtqueue *vq, { u16 flags; - if (!vq->use_dma_api) - return; - flags = extra->flags; if (flags & VRING_DESC_F_INDIRECT) { + if (!vq->use_dma_api) + return; + dma_unmap_single(vring_dma_dev(vq), extra->addr, extra->len, (flags & VRING_DESC_F_WRITE) ? DMA_FROM_DEVICE : DMA_TO_DEVICE); } else { + if (!vq->do_unmap) + return; + dma_unmap_page(vring_dma_dev(vq), extra->addr, extra->len, (flags & VRING_DESC_F_WRITE) ? @@ -1218,7 +1251,7 @@ static void vring_unmap_desc_packed(const struct vring_virtqueue *vq, { u16 flags; - if (!vq->use_dma_api) + if (!vq->do_unmap) return; flags = le16_to_cpu(desc->flags); @@ -1279,9 +1312,8 @@ static int virtqueue_add_indirect_packed(struct vring_virtqueue *vq, for (n = 0; n < out_sgs + in_sgs; n++) { for (sg = sgs[n]; sg; sg = sg_next(sg)) { - addr = vring_map_one_sg(vq, sg, n < out_sgs ? - DMA_TO_DEVICE : DMA_FROM_DEVICE); - if (vring_mapping_error(vq, addr)) + if (vring_map_one_sg(vq, sg, n < out_sgs ? + DMA_TO_DEVICE : DMA_FROM_DEVICE, &addr)) goto unmap_release; desc[i].flags = cpu_to_le16(n < out_sgs ? @@ -1296,15 +1328,19 @@ static int virtqueue_add_indirect_packed(struct vring_virtqueue *vq, addr = vring_map_single(vq, desc, total_sg * sizeof(struct vring_packed_desc), DMA_TO_DEVICE); - if (vring_mapping_error(vq, addr)) + if (vring_mapping_error(vq, addr)) { + if (vq->premapped) + goto free_desc; + goto unmap_release; + } vq->packed.vring.desc[head].addr = cpu_to_le64(addr); vq->packed.vring.desc[head].len = cpu_to_le32(total_sg * sizeof(struct vring_packed_desc)); vq->packed.vring.desc[head].id = cpu_to_le16(id); - if (vq->use_dma_api) { + if (vq->do_unmap) { vq->packed.desc_extra[id].addr = addr; vq->packed.desc_extra[id].len = total_sg * sizeof(struct vring_packed_desc); @@ -1355,6 +1391,7 @@ unmap_release: for (i = 0; i < err_idx; i++) vring_unmap_desc_packed(vq, &desc[i]); +free_desc: kfree(desc); END_USE(vq); @@ -1426,9 +1463,10 @@ static inline int virtqueue_add_packed(struct virtqueue *_vq, c = 0; for (n = 0; n < out_sgs + in_sgs; n++) { for (sg = sgs[n]; sg; sg = sg_next(sg)) { - dma_addr_t addr = vring_map_one_sg(vq, sg, n < out_sgs ? - DMA_TO_DEVICE : DMA_FROM_DEVICE); - if (vring_mapping_error(vq, addr)) + dma_addr_t addr; + + if (vring_map_one_sg(vq, sg, n < out_sgs ? + DMA_TO_DEVICE : DMA_FROM_DEVICE, &addr)) goto unmap_release; flags = cpu_to_le16(vq->packed.avail_used_flags | @@ -1443,7 +1481,7 @@ static inline int virtqueue_add_packed(struct virtqueue *_vq, desc[i].len = cpu_to_le32(sg->length); desc[i].id = cpu_to_le16(id); - if (unlikely(vq->use_dma_api)) { + if (unlikely(vq->do_unmap)) { vq->packed.desc_extra[curr].addr = addr; vq->packed.desc_extra[curr].len = sg->length; vq->packed.desc_extra[curr].flags = @@ -1461,7 +1499,7 @@ static inline int virtqueue_add_packed(struct virtqueue *_vq, } } - if (i < head) + if (i <= head) vq->packed.avail_wrap_counter ^= 1; /* We're using some buffers from the free list. */ @@ -1577,7 +1615,7 @@ static void detach_buf_packed(struct vring_virtqueue *vq, vq->free_head = id; vq->vq.num_free += state->num; - if (unlikely(vq->use_dma_api)) { + if (unlikely(vq->do_unmap)) { curr = id; for (i = 0; i < state->num; i++) { vring_unmap_extra_packed(vq, @@ -1594,7 +1632,7 @@ static void detach_buf_packed(struct vring_virtqueue *vq, if (!desc) return; - if (vq->use_dma_api) { + if (vq->do_unmap) { len = vq->packed.desc_extra[id].len; for (i = 0; i < len / sizeof(struct vring_packed_desc); i++) @@ -2052,6 +2090,8 @@ static struct virtqueue *vring_create_virtqueue_packed( vq->packed_ring = true; vq->dma_dev = dma_dev; vq->use_dma_api = vring_use_dma_api(vdev); + vq->premapped = false; + vq->do_unmap = vq->use_dma_api; vq->indirect = virtio_has_feature(vdev, VIRTIO_RING_F_INDIRECT_DESC) && !context; @@ -2112,6 +2152,43 @@ err_ring: return -ENOMEM; } +static int virtqueue_disable_and_recycle(struct virtqueue *_vq, + void (*recycle)(struct virtqueue *vq, void *buf)) +{ + struct vring_virtqueue *vq = to_vvq(_vq); + struct virtio_device *vdev = vq->vq.vdev; + void *buf; + int err; + + if (!vq->we_own_ring) + return -EPERM; + + if (!vdev->config->disable_vq_and_reset) + return -ENOENT; + + if (!vdev->config->enable_vq_after_reset) + return -ENOENT; + + err = vdev->config->disable_vq_and_reset(_vq); + if (err) + return err; + + while ((buf = virtqueue_detach_unused_buf(_vq)) != NULL) + recycle(_vq, buf); + + return 0; +} + +static int virtqueue_enable_after_reset(struct virtqueue *_vq) +{ + struct vring_virtqueue *vq = to_vvq(_vq); + struct virtio_device *vdev = vq->vq.vdev; + + if (vdev->config->enable_vq_after_reset(_vq)) + return -EBUSY; + + return 0; +} /* * Generic functions and exported symbols. @@ -2237,6 +2314,23 @@ int virtqueue_add_inbuf_ctx(struct virtqueue *vq, } EXPORT_SYMBOL_GPL(virtqueue_add_inbuf_ctx); +/** + * virtqueue_dma_dev - get the dma dev + * @_vq: the struct virtqueue we're talking about. + * + * Returns the dma dev. That can been used for dma api. + */ +struct device *virtqueue_dma_dev(struct virtqueue *_vq) +{ + struct vring_virtqueue *vq = to_vvq(_vq); + + if (vq->use_dma_api) + return vring_dma_dev(vq); + else + return NULL; +} +EXPORT_SYMBOL_GPL(virtqueue_dma_dev); + /** * virtqueue_kick_prepare - first half of split virtqueue_kick call. * @_vq: the struct virtqueue @@ -2541,6 +2635,8 @@ static struct virtqueue *__vring_new_virtqueue(unsigned int index, #endif vq->dma_dev = dma_dev; vq->use_dma_api = vring_use_dma_api(vdev); + vq->premapped = false; + vq->do_unmap = vq->use_dma_api; vq->indirect = virtio_has_feature(vdev, VIRTIO_RING_F_INDIRECT_DESC) && !context; @@ -2619,7 +2715,7 @@ EXPORT_SYMBOL_GPL(vring_create_virtqueue_dma); * virtqueue_resize - resize the vring of vq * @_vq: the struct virtqueue we're talking about. * @num: new ring num - * @recycle: callback for recycle the useless buffer + * @recycle: callback to recycle unused buffers * * When it is really necessary to create a new vring, it will set the current vq * into the reset state. Then call the passed callback to recycle the buffer @@ -2643,13 +2739,8 @@ int virtqueue_resize(struct virtqueue *_vq, u32 num, void (*recycle)(struct virtqueue *vq, void *buf)) { struct vring_virtqueue *vq = to_vvq(_vq); - struct virtio_device *vdev = vq->vq.vdev; - void *buf; int err; - if (!vq->we_own_ring) - return -EPERM; - if (num > vq->vq.num_max) return -E2BIG; @@ -2659,31 +2750,101 @@ int virtqueue_resize(struct virtqueue *_vq, u32 num, if ((vq->packed_ring ? vq->packed.vring.num : vq->split.vring.num) == num) return 0; - if (!vdev->config->disable_vq_and_reset) - return -ENOENT; - - if (!vdev->config->enable_vq_after_reset) - return -ENOENT; - - err = vdev->config->disable_vq_and_reset(_vq); + err = virtqueue_disable_and_recycle(_vq, recycle); if (err) return err; - while ((buf = virtqueue_detach_unused_buf(_vq)) != NULL) - recycle(_vq, buf); - if (vq->packed_ring) err = virtqueue_resize_packed(_vq, num); else err = virtqueue_resize_split(_vq, num); - if (vdev->config->enable_vq_after_reset(_vq)) - return -EBUSY; - - return err; + return virtqueue_enable_after_reset(_vq); } EXPORT_SYMBOL_GPL(virtqueue_resize); +/** + * virtqueue_set_dma_premapped - set the vring premapped mode + * @_vq: the struct virtqueue we're talking about. + * + * Enable the premapped mode of the vq. + * + * The vring in premapped mode does not do dma internally, so the driver must + * do dma mapping in advance. The driver must pass the dma_address through + * dma_address of scatterlist. When the driver got a used buffer from + * the vring, it has to unmap the dma address. + * + * This function must be called immediately after creating the vq, or after vq + * reset, and before adding any buffers to it. + * + * Caller must ensure we don't call this with other virtqueue operations + * at the same time (except where noted). + * + * Returns zero or a negative error. + * 0: success. + * -EINVAL: vring does not use the dma api, so we can not enable premapped mode. + */ +int virtqueue_set_dma_premapped(struct virtqueue *_vq) +{ + struct vring_virtqueue *vq = to_vvq(_vq); + u32 num; + + START_USE(vq); + + num = vq->packed_ring ? vq->packed.vring.num : vq->split.vring.num; + + if (num != vq->vq.num_free) { + END_USE(vq); + return -EINVAL; + } + + if (!vq->use_dma_api) { + END_USE(vq); + return -EINVAL; + } + + vq->premapped = true; + vq->do_unmap = false; + + END_USE(vq); + + return 0; +} +EXPORT_SYMBOL_GPL(virtqueue_set_dma_premapped); + +/** + * virtqueue_reset - detach and recycle all unused buffers + * @_vq: the struct virtqueue we're talking about. + * @recycle: callback to recycle unused buffers + * + * Caller must ensure we don't call this with other virtqueue operations + * at the same time (except where noted). + * + * Returns zero or a negative error. + * 0: success. + * -EBUSY: Failed to sync with device, vq may not work properly + * -ENOENT: Transport or device not supported + * -EPERM: Operation not permitted + */ +int virtqueue_reset(struct virtqueue *_vq, + void (*recycle)(struct virtqueue *vq, void *buf)) +{ + struct vring_virtqueue *vq = to_vvq(_vq); + int err; + + err = virtqueue_disable_and_recycle(_vq, recycle); + if (err) + return err; + + if (vq->packed_ring) + virtqueue_reinit_packed(vq); + else + virtqueue_reinit_split(vq); + + return virtqueue_enable_after_reset(_vq); +} +EXPORT_SYMBOL_GPL(virtqueue_reset); + /* Only available for split ring */ struct virtqueue *vring_new_virtqueue(unsigned int index, unsigned int num, @@ -2945,4 +3106,149 @@ const struct vring *virtqueue_get_vring(const struct virtqueue *vq) } EXPORT_SYMBOL_GPL(virtqueue_get_vring); +/** + * virtqueue_dma_map_single_attrs - map DMA for _vq + * @_vq: the struct virtqueue we're talking about. + * @ptr: the pointer of the buffer to do dma + * @size: the size of the buffer to do dma + * @dir: DMA direction + * @attrs: DMA Attrs + * + * The caller calls this to do dma mapping in advance. The DMA address can be + * passed to this _vq when it is in pre-mapped mode. + * + * return DMA address. Caller should check that by virtqueue_dma_mapping_error(). + */ +dma_addr_t virtqueue_dma_map_single_attrs(struct virtqueue *_vq, void *ptr, + size_t size, + enum dma_data_direction dir, + unsigned long attrs) +{ + struct vring_virtqueue *vq = to_vvq(_vq); + + if (!vq->use_dma_api) + return (dma_addr_t)virt_to_phys(ptr); + + return dma_map_single_attrs(vring_dma_dev(vq), ptr, size, dir, attrs); +} +EXPORT_SYMBOL_GPL(virtqueue_dma_map_single_attrs); + +/** + * virtqueue_dma_unmap_single_attrs - unmap DMA for _vq + * @_vq: the struct virtqueue we're talking about. + * @addr: the dma address to unmap + * @size: the size of the buffer + * @dir: DMA direction + * @attrs: DMA Attrs + * + * Unmap the address that is mapped by the virtqueue_dma_map_* APIs. + * + */ +void virtqueue_dma_unmap_single_attrs(struct virtqueue *_vq, dma_addr_t addr, + size_t size, enum dma_data_direction dir, + unsigned long attrs) +{ + struct vring_virtqueue *vq = to_vvq(_vq); + + if (!vq->use_dma_api) + return; + + dma_unmap_single_attrs(vring_dma_dev(vq), addr, size, dir, attrs); +} +EXPORT_SYMBOL_GPL(virtqueue_dma_unmap_single_attrs); + +/** + * virtqueue_dma_mapping_error - check dma address + * @_vq: the struct virtqueue we're talking about. + * @addr: DMA address + * + * Returns 0 means dma valid. Other means invalid dma address. + */ +int virtqueue_dma_mapping_error(struct virtqueue *_vq, dma_addr_t addr) +{ + struct vring_virtqueue *vq = to_vvq(_vq); + + if (!vq->use_dma_api) + return 0; + + return dma_mapping_error(vring_dma_dev(vq), addr); +} +EXPORT_SYMBOL_GPL(virtqueue_dma_mapping_error); + +/** + * virtqueue_dma_need_sync - check a dma address needs sync + * @_vq: the struct virtqueue we're talking about. + * @addr: DMA address + * + * Check if the dma address mapped by the virtqueue_dma_map_* APIs needs to be + * synchronized + * + * return bool + */ +bool virtqueue_dma_need_sync(struct virtqueue *_vq, dma_addr_t addr) +{ + struct vring_virtqueue *vq = to_vvq(_vq); + + if (!vq->use_dma_api) + return false; + + return dma_need_sync(vring_dma_dev(vq), addr); +} +EXPORT_SYMBOL_GPL(virtqueue_dma_need_sync); + +/** + * virtqueue_dma_sync_single_range_for_cpu - dma sync for cpu + * @_vq: the struct virtqueue we're talking about. + * @addr: DMA address + * @offset: DMA address offset + * @size: buf size for sync + * @dir: DMA direction + * + * Before calling this function, use virtqueue_dma_need_sync() to confirm that + * the DMA address really needs to be synchronized + * + */ +void virtqueue_dma_sync_single_range_for_cpu(struct virtqueue *_vq, + dma_addr_t addr, + unsigned long offset, size_t size, + enum dma_data_direction dir) +{ + struct vring_virtqueue *vq = to_vvq(_vq); + struct device *dev = vring_dma_dev(vq); + + if (!vq->use_dma_api) + return; + + dma_sync_single_range_for_cpu(dev, addr, offset, size, + DMA_BIDIRECTIONAL); +} +EXPORT_SYMBOL_GPL(virtqueue_dma_sync_single_range_for_cpu); + +/** + * virtqueue_dma_sync_single_range_for_device - dma sync for device + * @_vq: the struct virtqueue we're talking about. + * @addr: DMA address + * @offset: DMA address offset + * @size: buf size for sync + * @dir: DMA direction + * + * Before calling this function, use virtqueue_dma_need_sync() to confirm that + * the DMA address really needs to be synchronized + */ +void virtqueue_dma_sync_single_range_for_device(struct virtqueue *_vq, + dma_addr_t addr, + unsigned long offset, size_t size, + enum dma_data_direction dir) +{ + struct vring_virtqueue *vq = to_vvq(_vq); + struct device *dev = vring_dma_dev(vq); + + if (!vq->use_dma_api) + return; + + dma_sync_single_range_for_device(dev, addr, offset, size, + DMA_BIDIRECTIONAL); +} +EXPORT_SYMBOL_GPL(virtqueue_dma_sync_single_range_for_device); + MODULE_LICENSE("GPL"); diff --git a/drivers/virtio/virtio_vdpa.c b/drivers/virtio/virtio_vdpa.c index 961161da5900..06ce6d8c2e00 100644 --- a/drivers/virtio/virtio_vdpa.c +++ b/drivers/virtio/virtio_vdpa.c @@ -366,11 +366,14 @@ static int virtio_vdpa_find_vqs(struct virtio_device *vdev, unsigned int nvqs, struct irq_affinity default_affd = { 0 }; struct cpumask *masks; struct vdpa_callback cb; + bool has_affinity = desc && ops->set_vq_affinity; int i, err, queue_idx = 0; - masks = create_affinity_masks(nvqs, desc ? desc : &default_affd); - if (!masks) - return -ENOMEM; + if (has_affinity) { + masks = create_affinity_masks(nvqs, desc ? desc : &default_affd); + if (!masks) + return -ENOMEM; + } for (i = 0; i < nvqs; ++i) { if (!names[i]) { @@ -386,20 +389,22 @@ static int virtio_vdpa_find_vqs(struct virtio_device *vdev, unsigned int nvqs, goto err_setup_vq; } - if (ops->set_vq_affinity) + if (has_affinity) ops->set_vq_affinity(vdpa, i, &masks[i]); } cb.callback = virtio_vdpa_config_cb; cb.private = vd_dev; ops->set_config_cb(vdpa, &cb); - kfree(masks); + if (has_affinity) + kfree(masks); return 0; err_setup_vq: virtio_vdpa_del_vqs(vdev); - kfree(masks); + if (has_affinity) + kfree(masks); return err; } diff --git a/include/linux/vdpa.h b/include/linux/vdpa.h index db1b0eaef4eb..0e652026b776 100644 --- a/include/linux/vdpa.h +++ b/include/linux/vdpa.h @@ -208,6 +208,9 @@ struct vdpa_map_file { * @vdev: vdpa device * Returns the virtio features support by the * device + * @get_backend_features: Get parent-specific backend features (optional) + * Returns the vdpa features supported by the + * device. * @set_driver_features: Set virtio features supported by the driver * @vdev: vdpa device * @features: feature support by the driver @@ -358,6 +361,7 @@ struct vdpa_config_ops { u32 (*get_vq_align)(struct vdpa_device *vdev); u32 (*get_vq_group)(struct vdpa_device *vdev, u16 idx); u64 (*get_device_features)(struct vdpa_device *vdev); + u64 (*get_backend_features)(const struct vdpa_device *vdev); int (*set_driver_features)(struct vdpa_device *vdev, u64 features); u64 (*get_driver_features)(struct vdpa_device *vdev); void (*set_config_cb)(struct vdpa_device *vdev, diff --git a/include/linux/virtio.h b/include/linux/virtio.h index de6041deee37..1311a7fbe675 100644 --- a/include/linux/virtio.h +++ b/include/linux/virtio.h @@ -9,6 +9,7 @@ #include #include #include +#include /** * struct virtqueue - a queue to register buffers for sending or receiving. @@ -61,6 +62,8 @@ int virtqueue_add_sgs(struct virtqueue *vq, void *data, gfp_t gfp); +struct device *virtqueue_dma_dev(struct virtqueue *vq); + bool virtqueue_kick(struct virtqueue *vq); bool virtqueue_kick_prepare(struct virtqueue *vq); @@ -78,6 +81,8 @@ bool virtqueue_enable_cb(struct virtqueue *vq); unsigned virtqueue_enable_cb_prepare(struct virtqueue *vq); +int virtqueue_set_dma_premapped(struct virtqueue *_vq); + bool virtqueue_poll(struct virtqueue *vq, unsigned); bool virtqueue_enable_cb_delayed(struct virtqueue *vq); @@ -95,6 +100,8 @@ dma_addr_t virtqueue_get_used_addr(const struct virtqueue *vq); int virtqueue_resize(struct virtqueue *vq, u32 num, void (*recycle)(struct virtqueue *vq, void *buf)); +int virtqueue_reset(struct virtqueue *vq, + void (*recycle)(struct virtqueue *vq, void *buf)); /** * struct virtio_device - representation of a device using virtio @@ -206,4 +213,19 @@ void unregister_virtio_driver(struct virtio_driver *drv); #define module_virtio_driver(__virtio_driver) \ module_driver(__virtio_driver, register_virtio_driver, \ unregister_virtio_driver) + +dma_addr_t virtqueue_dma_map_single_attrs(struct virtqueue *_vq, void *ptr, size_t size, + enum dma_data_direction dir, unsigned long attrs); +void virtqueue_dma_unmap_single_attrs(struct virtqueue *_vq, dma_addr_t addr, + size_t size, enum dma_data_direction dir, + unsigned long attrs); +int virtqueue_dma_mapping_error(struct virtqueue *_vq, dma_addr_t addr); + +bool virtqueue_dma_need_sync(struct virtqueue *_vq, dma_addr_t addr); +void virtqueue_dma_sync_single_range_for_cpu(struct virtqueue *_vq, dma_addr_t addr, + unsigned long offset, size_t size, + enum dma_data_direction dir); +void virtqueue_dma_sync_single_range_for_device(struct virtqueue *_vq, dma_addr_t addr, + unsigned long offset, size_t size, + enum dma_data_direction dir); #endif /* _LINUX_VIRTIO_H */ diff --git a/include/uapi/linux/vhost_types.h b/include/uapi/linux/vhost_types.h index d3aad12ad1fa..2d827d22cd99 100644 --- a/include/uapi/linux/vhost_types.h +++ b/include/uapi/linux/vhost_types.h @@ -181,5 +181,9 @@ struct vhost_vdpa_iova_range { #define VHOST_BACKEND_F_SUSPEND 0x4 /* Device can be resumed */ #define VHOST_BACKEND_F_RESUME 0x5 +/* Device supports the driver enabling virtqueues both before and after + * DRIVER_OK + */ +#define VHOST_BACKEND_F_ENABLE_AFTER_DRIVER_OK 0x6 #endif