vduse: Support registering userspace memory for IOVA regions
Introduce two ioctls: VDUSE_IOTLB_REG_UMEM and VDUSE_IOTLB_DEREG_UMEM to support registering and de-registering userspace memory for IOVA regions. Now it only supports registering userspace memory for bounce buffer region in virtio-vdpa case. Signed-off-by: Xie Yongji <xieyongji@bytedance.com> Acked-by: Jason Wang <jasowang@redhat.com> Message-Id: <20220803045523.23851-5-xieyongji@bytedance.com> Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
This commit is contained in:
parent
6c77ed2288
commit
79a463be9e
@ -21,6 +21,8 @@
|
|||||||
#include <linux/uio.h>
|
#include <linux/uio.h>
|
||||||
#include <linux/vdpa.h>
|
#include <linux/vdpa.h>
|
||||||
#include <linux/nospec.h>
|
#include <linux/nospec.h>
|
||||||
|
#include <linux/vmalloc.h>
|
||||||
|
#include <linux/sched/mm.h>
|
||||||
#include <uapi/linux/vduse.h>
|
#include <uapi/linux/vduse.h>
|
||||||
#include <uapi/linux/vdpa.h>
|
#include <uapi/linux/vdpa.h>
|
||||||
#include <uapi/linux/virtio_config.h>
|
#include <uapi/linux/virtio_config.h>
|
||||||
@ -64,6 +66,13 @@ struct vduse_vdpa {
|
|||||||
struct vduse_dev *dev;
|
struct vduse_dev *dev;
|
||||||
};
|
};
|
||||||
|
|
||||||
|
struct vduse_umem {
|
||||||
|
unsigned long iova;
|
||||||
|
unsigned long npages;
|
||||||
|
struct page **pages;
|
||||||
|
struct mm_struct *mm;
|
||||||
|
};
|
||||||
|
|
||||||
struct vduse_dev {
|
struct vduse_dev {
|
||||||
struct vduse_vdpa *vdev;
|
struct vduse_vdpa *vdev;
|
||||||
struct device *dev;
|
struct device *dev;
|
||||||
@ -95,6 +104,8 @@ struct vduse_dev {
|
|||||||
u8 status;
|
u8 status;
|
||||||
u32 vq_num;
|
u32 vq_num;
|
||||||
u32 vq_align;
|
u32 vq_align;
|
||||||
|
struct vduse_umem *umem;
|
||||||
|
struct mutex mem_lock;
|
||||||
};
|
};
|
||||||
|
|
||||||
struct vduse_dev_msg {
|
struct vduse_dev_msg {
|
||||||
@ -917,6 +928,102 @@ unlock:
|
|||||||
return ret;
|
return ret;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
static int vduse_dev_dereg_umem(struct vduse_dev *dev,
|
||||||
|
u64 iova, u64 size)
|
||||||
|
{
|
||||||
|
int ret;
|
||||||
|
|
||||||
|
mutex_lock(&dev->mem_lock);
|
||||||
|
ret = -ENOENT;
|
||||||
|
if (!dev->umem)
|
||||||
|
goto unlock;
|
||||||
|
|
||||||
|
ret = -EINVAL;
|
||||||
|
if (dev->umem->iova != iova || size != dev->domain->bounce_size)
|
||||||
|
goto unlock;
|
||||||
|
|
||||||
|
vduse_domain_remove_user_bounce_pages(dev->domain);
|
||||||
|
unpin_user_pages_dirty_lock(dev->umem->pages,
|
||||||
|
dev->umem->npages, true);
|
||||||
|
atomic64_sub(dev->umem->npages, &dev->umem->mm->pinned_vm);
|
||||||
|
mmdrop(dev->umem->mm);
|
||||||
|
vfree(dev->umem->pages);
|
||||||
|
kfree(dev->umem);
|
||||||
|
dev->umem = NULL;
|
||||||
|
ret = 0;
|
||||||
|
unlock:
|
||||||
|
mutex_unlock(&dev->mem_lock);
|
||||||
|
return ret;
|
||||||
|
}
|
||||||
|
|
||||||
|
static int vduse_dev_reg_umem(struct vduse_dev *dev,
|
||||||
|
u64 iova, u64 uaddr, u64 size)
|
||||||
|
{
|
||||||
|
struct page **page_list = NULL;
|
||||||
|
struct vduse_umem *umem = NULL;
|
||||||
|
long pinned = 0;
|
||||||
|
unsigned long npages, lock_limit;
|
||||||
|
int ret;
|
||||||
|
|
||||||
|
if (!dev->domain->bounce_map ||
|
||||||
|
size != dev->domain->bounce_size ||
|
||||||
|
iova != 0 || uaddr & ~PAGE_MASK)
|
||||||
|
return -EINVAL;
|
||||||
|
|
||||||
|
mutex_lock(&dev->mem_lock);
|
||||||
|
ret = -EEXIST;
|
||||||
|
if (dev->umem)
|
||||||
|
goto unlock;
|
||||||
|
|
||||||
|
ret = -ENOMEM;
|
||||||
|
npages = size >> PAGE_SHIFT;
|
||||||
|
page_list = __vmalloc(array_size(npages, sizeof(struct page *)),
|
||||||
|
GFP_KERNEL_ACCOUNT);
|
||||||
|
umem = kzalloc(sizeof(*umem), GFP_KERNEL);
|
||||||
|
if (!page_list || !umem)
|
||||||
|
goto unlock;
|
||||||
|
|
||||||
|
mmap_read_lock(current->mm);
|
||||||
|
|
||||||
|
lock_limit = PFN_DOWN(rlimit(RLIMIT_MEMLOCK));
|
||||||
|
if (npages + atomic64_read(¤t->mm->pinned_vm) > lock_limit)
|
||||||
|
goto out;
|
||||||
|
|
||||||
|
pinned = pin_user_pages(uaddr, npages, FOLL_LONGTERM | FOLL_WRITE,
|
||||||
|
page_list, NULL);
|
||||||
|
if (pinned != npages) {
|
||||||
|
ret = pinned < 0 ? pinned : -ENOMEM;
|
||||||
|
goto out;
|
||||||
|
}
|
||||||
|
|
||||||
|
ret = vduse_domain_add_user_bounce_pages(dev->domain,
|
||||||
|
page_list, pinned);
|
||||||
|
if (ret)
|
||||||
|
goto out;
|
||||||
|
|
||||||
|
atomic64_add(npages, ¤t->mm->pinned_vm);
|
||||||
|
|
||||||
|
umem->pages = page_list;
|
||||||
|
umem->npages = pinned;
|
||||||
|
umem->iova = iova;
|
||||||
|
umem->mm = current->mm;
|
||||||
|
mmgrab(current->mm);
|
||||||
|
|
||||||
|
dev->umem = umem;
|
||||||
|
out:
|
||||||
|
if (ret && pinned > 0)
|
||||||
|
unpin_user_pages(page_list, pinned);
|
||||||
|
|
||||||
|
mmap_read_unlock(current->mm);
|
||||||
|
unlock:
|
||||||
|
if (ret) {
|
||||||
|
vfree(page_list);
|
||||||
|
kfree(umem);
|
||||||
|
}
|
||||||
|
mutex_unlock(&dev->mem_lock);
|
||||||
|
return ret;
|
||||||
|
}
|
||||||
|
|
||||||
static long vduse_dev_ioctl(struct file *file, unsigned int cmd,
|
static long vduse_dev_ioctl(struct file *file, unsigned int cmd,
|
||||||
unsigned long arg)
|
unsigned long arg)
|
||||||
{
|
{
|
||||||
@ -1089,6 +1196,38 @@ static long vduse_dev_ioctl(struct file *file, unsigned int cmd,
|
|||||||
ret = vduse_dev_queue_irq_work(dev, &dev->vqs[index].inject);
|
ret = vduse_dev_queue_irq_work(dev, &dev->vqs[index].inject);
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
|
case VDUSE_IOTLB_REG_UMEM: {
|
||||||
|
struct vduse_iova_umem umem;
|
||||||
|
|
||||||
|
ret = -EFAULT;
|
||||||
|
if (copy_from_user(&umem, argp, sizeof(umem)))
|
||||||
|
break;
|
||||||
|
|
||||||
|
ret = -EINVAL;
|
||||||
|
if (!is_mem_zero((const char *)umem.reserved,
|
||||||
|
sizeof(umem.reserved)))
|
||||||
|
break;
|
||||||
|
|
||||||
|
ret = vduse_dev_reg_umem(dev, umem.iova,
|
||||||
|
umem.uaddr, umem.size);
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
case VDUSE_IOTLB_DEREG_UMEM: {
|
||||||
|
struct vduse_iova_umem umem;
|
||||||
|
|
||||||
|
ret = -EFAULT;
|
||||||
|
if (copy_from_user(&umem, argp, sizeof(umem)))
|
||||||
|
break;
|
||||||
|
|
||||||
|
ret = -EINVAL;
|
||||||
|
if (!is_mem_zero((const char *)umem.reserved,
|
||||||
|
sizeof(umem.reserved)))
|
||||||
|
break;
|
||||||
|
|
||||||
|
ret = vduse_dev_dereg_umem(dev, umem.iova,
|
||||||
|
umem.size);
|
||||||
|
break;
|
||||||
|
}
|
||||||
default:
|
default:
|
||||||
ret = -ENOIOCTLCMD;
|
ret = -ENOIOCTLCMD;
|
||||||
break;
|
break;
|
||||||
@ -1101,6 +1240,7 @@ static int vduse_dev_release(struct inode *inode, struct file *file)
|
|||||||
{
|
{
|
||||||
struct vduse_dev *dev = file->private_data;
|
struct vduse_dev *dev = file->private_data;
|
||||||
|
|
||||||
|
vduse_dev_dereg_umem(dev, 0, dev->domain->bounce_size);
|
||||||
spin_lock(&dev->msg_lock);
|
spin_lock(&dev->msg_lock);
|
||||||
/* Make sure the inflight messages can processed after reconncection */
|
/* Make sure the inflight messages can processed after reconncection */
|
||||||
list_splice_init(&dev->recv_list, &dev->send_list);
|
list_splice_init(&dev->recv_list, &dev->send_list);
|
||||||
@ -1163,6 +1303,7 @@ static struct vduse_dev *vduse_dev_create(void)
|
|||||||
return NULL;
|
return NULL;
|
||||||
|
|
||||||
mutex_init(&dev->lock);
|
mutex_init(&dev->lock);
|
||||||
|
mutex_init(&dev->mem_lock);
|
||||||
spin_lock_init(&dev->msg_lock);
|
spin_lock_init(&dev->msg_lock);
|
||||||
INIT_LIST_HEAD(&dev->send_list);
|
INIT_LIST_HEAD(&dev->send_list);
|
||||||
INIT_LIST_HEAD(&dev->recv_list);
|
INIT_LIST_HEAD(&dev->recv_list);
|
||||||
|
@ -210,6 +210,29 @@ struct vduse_vq_eventfd {
|
|||||||
*/
|
*/
|
||||||
#define VDUSE_VQ_INJECT_IRQ _IOW(VDUSE_BASE, 0x17, __u32)
|
#define VDUSE_VQ_INJECT_IRQ _IOW(VDUSE_BASE, 0x17, __u32)
|
||||||
|
|
||||||
|
/**
|
||||||
|
* struct vduse_iova_umem - userspace memory configuration for one IOVA region
|
||||||
|
* @uaddr: start address of userspace memory, it must be aligned to page size
|
||||||
|
* @iova: start of the IOVA region
|
||||||
|
* @size: size of the IOVA region
|
||||||
|
* @reserved: for future use, needs to be initialized to zero
|
||||||
|
*
|
||||||
|
* Structure used by VDUSE_IOTLB_REG_UMEM and VDUSE_IOTLB_DEREG_UMEM
|
||||||
|
* ioctls to register/de-register userspace memory for IOVA regions
|
||||||
|
*/
|
||||||
|
struct vduse_iova_umem {
|
||||||
|
__u64 uaddr;
|
||||||
|
__u64 iova;
|
||||||
|
__u64 size;
|
||||||
|
__u64 reserved[3];
|
||||||
|
};
|
||||||
|
|
||||||
|
/* Register userspace memory for IOVA regions */
|
||||||
|
#define VDUSE_IOTLB_REG_UMEM _IOW(VDUSE_BASE, 0x18, struct vduse_iova_umem)
|
||||||
|
|
||||||
|
/* De-register the userspace memory. Caller should set iova and size field. */
|
||||||
|
#define VDUSE_IOTLB_DEREG_UMEM _IOW(VDUSE_BASE, 0x19, struct vduse_iova_umem)
|
||||||
|
|
||||||
/* The control messages definition for read(2)/write(2) on /dev/vduse/$NAME */
|
/* The control messages definition for read(2)/write(2) on /dev/vduse/$NAME */
|
||||||
|
|
||||||
/**
|
/**
|
||||||
|
Loading…
x
Reference in New Issue
Block a user