virtio,vdpa,vhost: features, fixes
vduse driver supporting blk
virtio-vsock support for end of record with SEQPACKET
vdpa: mac and mq support for ifcvf and mlx5
vdpa: management netlink for ifcvf
virtio-i2c, gpio dt bindings
misc fixes, cleanups
NB: when merging this with
b542e383d8
("eventfd: Make signal recursion protection a task bit")
from Linus' tree, replace eventfd_signal_count with
eventfd_signal_allowed, and drop the export of eventfd_wake_count from
("eventfd: Export eventfd_wake_count to modules").
Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
-----BEGIN PGP SIGNATURE-----
iQFDBAABCAAtFiEEXQn9CHHI+FuUyooNKB8NuNKNVGkFAmE1+awPHG1zdEByZWRo
YXQuY29tAAoJECgfDbjSjVRpt6EIAJy0qrc62lktNA0IiIVJSLbUbTMmFj8MzkGR
8UxZdhpjWqBPJPyaOuNeksAqTGm/UAPEYx3C2c95Jhej7anFpy7dbCtIXcPHLJME
DjcJg+EDrlNCj8m0FcsHpHWsFzPMERJpyEZNxgB5WazQbv+yWhGrg2FN5DCnF0Ro
ZFYeKSVty148pQ0nHl8X0JM2XMtqit+O+LvKN2HQZ+fubh7BCzMxzkHY0QLHIzUS
UeZqd3Qm8YcbqnlX38P5D6k+NPiTEgknmxaBLkPxg6H3XxDAmaIRFb8Ldd1rsgy1
zTLGDiSGpVDIpawRnuEAzqJThV3Y5/MVJ1WD+mDYQ96tmhfp+KY=
=DBH/
-----END PGP SIGNATURE-----
Merge tag 'for_linus' of git://git.kernel.org/pub/scm/linux/kernel/git/mst/vhost
Pull virtio updates from Michael Tsirkin:
- vduse driver ("vDPA Device in Userspace") supporting emulated virtio
block devices
- virtio-vsock support for end of record with SEQPACKET
- vdpa: mac and mq support for ifcvf and mlx5
- vdpa: management netlink for ifcvf
- virtio-i2c, gpio dt bindings
- misc fixes and cleanups
* tag 'for_linus' of git://git.kernel.org/pub/scm/linux/kernel/git/mst/vhost: (39 commits)
Documentation: Add documentation for VDUSE
vduse: Introduce VDUSE - vDPA Device in Userspace
vduse: Implement an MMU-based software IOTLB
vdpa: Support transferring virtual addressing during DMA mapping
vdpa: factor out vhost_vdpa_pa_map() and vhost_vdpa_pa_unmap()
vdpa: Add an opaque pointer for vdpa_config_ops.dma_map()
vhost-iotlb: Add an opaque pointer for vhost IOTLB
vhost-vdpa: Handle the failure of vdpa_reset()
vdpa: Add reset callback in vdpa_config_ops
vdpa: Fix some coding style issues
file: Export receive_fd() to modules
eventfd: Export eventfd_wake_count to modules
iova: Export alloc_iova_fast() and free_iova_fast()
virtio-blk: remove unneeded "likely" statements
virtio-balloon: Use virtio_find_vqs() helper
vdpa: Make use of PFN_PHYS/PFN_UP/PFN_DOWN helper macro
vsock_test: update message bounds test for MSG_EOR
af_vsock: rename variables in receive loop
virtio/vsock: support MSG_EOR bit processing
vhost/vsock: support MSG_EOR bit processing
...
This commit is contained in:
commit
78e709522d
59
Documentation/devicetree/bindings/gpio/gpio-virtio.yaml
Normal file
59
Documentation/devicetree/bindings/gpio/gpio-virtio.yaml
Normal file
@ -0,0 +1,59 @@
|
||||
# SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause)
|
||||
%YAML 1.2
|
||||
---
|
||||
$id: http://devicetree.org/schemas/gpio/gpio-virtio.yaml#
|
||||
$schema: http://devicetree.org/meta-schemas/core.yaml#
|
||||
|
||||
title: Virtio GPIO controller
|
||||
|
||||
maintainers:
|
||||
- Viresh Kumar <viresh.kumar@linaro.org>
|
||||
|
||||
allOf:
|
||||
- $ref: /schemas/virtio/virtio-device.yaml#
|
||||
|
||||
description:
|
||||
Virtio GPIO controller, see /schemas/virtio/virtio-device.yaml for more
|
||||
details.
|
||||
|
||||
properties:
|
||||
$nodename:
|
||||
const: gpio
|
||||
|
||||
compatible:
|
||||
const: virtio,device29
|
||||
|
||||
gpio-controller: true
|
||||
|
||||
"#gpio-cells":
|
||||
const: 2
|
||||
|
||||
interrupt-controller: true
|
||||
|
||||
"#interrupt-cells":
|
||||
const: 2
|
||||
|
||||
required:
|
||||
- compatible
|
||||
- gpio-controller
|
||||
- "#gpio-cells"
|
||||
|
||||
unevaluatedProperties: false
|
||||
|
||||
examples:
|
||||
- |
|
||||
virtio@3000 {
|
||||
compatible = "virtio,mmio";
|
||||
reg = <0x3000 0x100>;
|
||||
interrupts = <41>;
|
||||
|
||||
gpio {
|
||||
compatible = "virtio,device29";
|
||||
gpio-controller;
|
||||
#gpio-cells = <2>;
|
||||
interrupt-controller;
|
||||
#interrupt-cells = <2>;
|
||||
};
|
||||
};
|
||||
|
||||
...
|
51
Documentation/devicetree/bindings/i2c/i2c-virtio.yaml
Normal file
51
Documentation/devicetree/bindings/i2c/i2c-virtio.yaml
Normal file
@ -0,0 +1,51 @@
|
||||
# SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause)
|
||||
%YAML 1.2
|
||||
---
|
||||
$id: http://devicetree.org/schemas/i2c/i2c-virtio.yaml#
|
||||
$schema: http://devicetree.org/meta-schemas/core.yaml#
|
||||
|
||||
title: Virtio I2C Adapter
|
||||
|
||||
maintainers:
|
||||
- Viresh Kumar <viresh.kumar@linaro.org>
|
||||
|
||||
allOf:
|
||||
- $ref: /schemas/i2c/i2c-controller.yaml#
|
||||
- $ref: /schemas/virtio/virtio-device.yaml#
|
||||
|
||||
description:
|
||||
Virtio I2C device, see /schemas/virtio/virtio-device.yaml for more details.
|
||||
|
||||
properties:
|
||||
$nodename:
|
||||
const: i2c
|
||||
|
||||
compatible:
|
||||
const: virtio,device22
|
||||
|
||||
required:
|
||||
- compatible
|
||||
|
||||
unevaluatedProperties: false
|
||||
|
||||
examples:
|
||||
- |
|
||||
virtio@3000 {
|
||||
compatible = "virtio,mmio";
|
||||
reg = <0x3000 0x100>;
|
||||
interrupts = <41>;
|
||||
|
||||
i2c {
|
||||
compatible = "virtio,device22";
|
||||
|
||||
#address-cells = <1>;
|
||||
#size-cells = <0>;
|
||||
|
||||
light-sensor@20 {
|
||||
compatible = "dynaimage,al3320a";
|
||||
reg = <0x20>;
|
||||
};
|
||||
};
|
||||
};
|
||||
|
||||
...
|
@ -36,7 +36,8 @@ required:
|
||||
- reg
|
||||
- interrupts
|
||||
|
||||
additionalProperties: false
|
||||
additionalProperties:
|
||||
type: object
|
||||
|
||||
examples:
|
||||
- |
|
||||
|
41
Documentation/devicetree/bindings/virtio/virtio-device.yaml
Normal file
41
Documentation/devicetree/bindings/virtio/virtio-device.yaml
Normal file
@ -0,0 +1,41 @@
|
||||
# SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause)
|
||||
%YAML 1.2
|
||||
---
|
||||
$id: http://devicetree.org/schemas/virtio/virtio-device.yaml#
|
||||
$schema: http://devicetree.org/meta-schemas/core.yaml#
|
||||
|
||||
title: Virtio device bindings
|
||||
|
||||
maintainers:
|
||||
- Viresh Kumar <viresh.kumar@linaro.org>
|
||||
|
||||
description:
|
||||
These bindings are applicable to virtio devices irrespective of the bus they
|
||||
are bound to, like mmio or pci.
|
||||
|
||||
# We need a select here so we don't match all nodes with 'virtio,mmio'
|
||||
properties:
|
||||
compatible:
|
||||
pattern: "^virtio,device[0-9a-f]{1,8}$"
|
||||
description: Virtio device nodes.
|
||||
"virtio,deviceID", where ID is the virtio device id. The textual
|
||||
representation of ID shall be in lower case hexadecimal with leading
|
||||
zeroes suppressed.
|
||||
|
||||
required:
|
||||
- compatible
|
||||
|
||||
additionalProperties: true
|
||||
|
||||
examples:
|
||||
- |
|
||||
virtio@3000 {
|
||||
compatible = "virtio,mmio";
|
||||
reg = <0x3000 0x100>;
|
||||
interrupts = <43>;
|
||||
|
||||
i2c {
|
||||
compatible = "virtio,device22";
|
||||
};
|
||||
};
|
||||
...
|
@ -27,6 +27,7 @@ place where this information is gathered.
|
||||
iommu
|
||||
media/index
|
||||
sysfs-platform_profile
|
||||
vduse
|
||||
|
||||
.. only:: subproject and html
|
||||
|
||||
|
@ -299,6 +299,7 @@ Code Seq# Include File Comments
|
||||
'z' 10-4F drivers/s390/crypto/zcrypt_api.h conflict!
|
||||
'|' 00-7F linux/media.h
|
||||
0x80 00-1F linux/fb.h
|
||||
0x81 00-1F linux/vduse.h
|
||||
0x89 00-06 arch/x86/include/asm/sockios.h
|
||||
0x89 0B-DF linux/sockios.h
|
||||
0x89 E0-EF linux/sockios.h SIOCPROTOPRIVATE range
|
||||
|
233
Documentation/userspace-api/vduse.rst
Normal file
233
Documentation/userspace-api/vduse.rst
Normal file
@ -0,0 +1,233 @@
|
||||
==================================
|
||||
VDUSE - "vDPA Device in Userspace"
|
||||
==================================
|
||||
|
||||
vDPA (virtio data path acceleration) device is a device that uses a
|
||||
datapath which complies with the virtio specifications with vendor
|
||||
specific control path. vDPA devices can be both physically located on
|
||||
the hardware or emulated by software. VDUSE is a framework that makes it
|
||||
possible to implement software-emulated vDPA devices in userspace. And
|
||||
to make the device emulation more secure, the emulated vDPA device's
|
||||
control path is handled in the kernel and only the data path is
|
||||
implemented in the userspace.
|
||||
|
||||
Note that only virtio block device is supported by VDUSE framework now,
|
||||
which can reduce security risks when the userspace process that implements
|
||||
the data path is run by an unprivileged user. The support for other device
|
||||
types can be added after the security issue of corresponding device driver
|
||||
is clarified or fixed in the future.
|
||||
|
||||
Create/Destroy VDUSE devices
|
||||
------------------------
|
||||
|
||||
VDUSE devices are created as follows:
|
||||
|
||||
1. Create a new VDUSE instance with ioctl(VDUSE_CREATE_DEV) on
|
||||
/dev/vduse/control.
|
||||
|
||||
2. Setup each virtqueue with ioctl(VDUSE_VQ_SETUP) on /dev/vduse/$NAME.
|
||||
|
||||
3. Begin processing VDUSE messages from /dev/vduse/$NAME. The first
|
||||
messages will arrive while attaching the VDUSE instance to vDPA bus.
|
||||
|
||||
4. Send the VDPA_CMD_DEV_NEW netlink message to attach the VDUSE
|
||||
instance to vDPA bus.
|
||||
|
||||
VDUSE devices are destroyed as follows:
|
||||
|
||||
1. Send the VDPA_CMD_DEV_DEL netlink message to detach the VDUSE
|
||||
instance from vDPA bus.
|
||||
|
||||
2. Close the file descriptor referring to /dev/vduse/$NAME.
|
||||
|
||||
3. Destroy the VDUSE instance with ioctl(VDUSE_DESTROY_DEV) on
|
||||
/dev/vduse/control.
|
||||
|
||||
The netlink messages can be sent via vdpa tool in iproute2 or use the
|
||||
below sample codes:
|
||||
|
||||
.. code-block:: c
|
||||
|
||||
static int netlink_add_vduse(const char *name, enum vdpa_command cmd)
|
||||
{
|
||||
struct nl_sock *nlsock;
|
||||
struct nl_msg *msg;
|
||||
int famid;
|
||||
|
||||
nlsock = nl_socket_alloc();
|
||||
if (!nlsock)
|
||||
return -ENOMEM;
|
||||
|
||||
if (genl_connect(nlsock))
|
||||
goto free_sock;
|
||||
|
||||
famid = genl_ctrl_resolve(nlsock, VDPA_GENL_NAME);
|
||||
if (famid < 0)
|
||||
goto close_sock;
|
||||
|
||||
msg = nlmsg_alloc();
|
||||
if (!msg)
|
||||
goto close_sock;
|
||||
|
||||
if (!genlmsg_put(msg, NL_AUTO_PORT, NL_AUTO_SEQ, famid, 0, 0, cmd, 0))
|
||||
goto nla_put_failure;
|
||||
|
||||
NLA_PUT_STRING(msg, VDPA_ATTR_DEV_NAME, name);
|
||||
if (cmd == VDPA_CMD_DEV_NEW)
|
||||
NLA_PUT_STRING(msg, VDPA_ATTR_MGMTDEV_DEV_NAME, "vduse");
|
||||
|
||||
if (nl_send_sync(nlsock, msg))
|
||||
goto close_sock;
|
||||
|
||||
nl_close(nlsock);
|
||||
nl_socket_free(nlsock);
|
||||
|
||||
return 0;
|
||||
nla_put_failure:
|
||||
nlmsg_free(msg);
|
||||
close_sock:
|
||||
nl_close(nlsock);
|
||||
free_sock:
|
||||
nl_socket_free(nlsock);
|
||||
return -1;
|
||||
}
|
||||
|
||||
How VDUSE works
|
||||
---------------
|
||||
|
||||
As mentioned above, a VDUSE device is created by ioctl(VDUSE_CREATE_DEV) on
|
||||
/dev/vduse/control. With this ioctl, userspace can specify some basic configuration
|
||||
such as device name (uniquely identify a VDUSE device), virtio features, virtio
|
||||
configuration space, the number of virtqueues and so on for this emulated device.
|
||||
Then a char device interface (/dev/vduse/$NAME) is exported to userspace for device
|
||||
emulation. Userspace can use the VDUSE_VQ_SETUP ioctl on /dev/vduse/$NAME to
|
||||
add per-virtqueue configuration such as the max size of virtqueue to the device.
|
||||
|
||||
After the initialization, the VDUSE device can be attached to vDPA bus via
|
||||
the VDPA_CMD_DEV_NEW netlink message. Userspace needs to read()/write() on
|
||||
/dev/vduse/$NAME to receive/reply some control messages from/to VDUSE kernel
|
||||
module as follows:
|
||||
|
||||
.. code-block:: c
|
||||
|
||||
static int vduse_message_handler(int dev_fd)
|
||||
{
|
||||
int len;
|
||||
struct vduse_dev_request req;
|
||||
struct vduse_dev_response resp;
|
||||
|
||||
len = read(dev_fd, &req, sizeof(req));
|
||||
if (len != sizeof(req))
|
||||
return -1;
|
||||
|
||||
resp.request_id = req.request_id;
|
||||
|
||||
switch (req.type) {
|
||||
|
||||
/* handle different types of messages */
|
||||
|
||||
}
|
||||
|
||||
len = write(dev_fd, &resp, sizeof(resp));
|
||||
if (len != sizeof(resp))
|
||||
return -1;
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
There are now three types of messages introduced by VDUSE framework:
|
||||
|
||||
- VDUSE_GET_VQ_STATE: Get the state for virtqueue, userspace should return
|
||||
avail index for split virtqueue or the device/driver ring wrap counters and
|
||||
the avail and used index for packed virtqueue.
|
||||
|
||||
- VDUSE_SET_STATUS: Set the device status, userspace should follow
|
||||
the virtio spec: https://docs.oasis-open.org/virtio/virtio/v1.1/virtio-v1.1.html
|
||||
to process this message. For example, fail to set the FEATURES_OK device
|
||||
status bit if the device can not accept the negotiated virtio features
|
||||
get from the VDUSE_DEV_GET_FEATURES ioctl.
|
||||
|
||||
- VDUSE_UPDATE_IOTLB: Notify userspace to update the memory mapping for specified
|
||||
IOVA range, userspace should firstly remove the old mapping, then setup the new
|
||||
mapping via the VDUSE_IOTLB_GET_FD ioctl.
|
||||
|
||||
After DRIVER_OK status bit is set via the VDUSE_SET_STATUS message, userspace is
|
||||
able to start the dataplane processing as follows:
|
||||
|
||||
1. Get the specified virtqueue's information with the VDUSE_VQ_GET_INFO ioctl,
|
||||
including the size, the IOVAs of descriptor table, available ring and used ring,
|
||||
the state and the ready status.
|
||||
|
||||
2. Pass the above IOVAs to the VDUSE_IOTLB_GET_FD ioctl so that those IOVA regions
|
||||
can be mapped into userspace. Some sample codes is shown below:
|
||||
|
||||
.. code-block:: c
|
||||
|
||||
static int perm_to_prot(uint8_t perm)
|
||||
{
|
||||
int prot = 0;
|
||||
|
||||
switch (perm) {
|
||||
case VDUSE_ACCESS_WO:
|
||||
prot |= PROT_WRITE;
|
||||
break;
|
||||
case VDUSE_ACCESS_RO:
|
||||
prot |= PROT_READ;
|
||||
break;
|
||||
case VDUSE_ACCESS_RW:
|
||||
prot |= PROT_READ | PROT_WRITE;
|
||||
break;
|
||||
}
|
||||
|
||||
return prot;
|
||||
}
|
||||
|
||||
static void *iova_to_va(int dev_fd, uint64_t iova, uint64_t *len)
|
||||
{
|
||||
int fd;
|
||||
void *addr;
|
||||
size_t size;
|
||||
struct vduse_iotlb_entry entry;
|
||||
|
||||
entry.start = iova;
|
||||
entry.last = iova;
|
||||
|
||||
/*
|
||||
* Find the first IOVA region that overlaps with the specified
|
||||
* range [start, last] and return the corresponding file descriptor.
|
||||
*/
|
||||
fd = ioctl(dev_fd, VDUSE_IOTLB_GET_FD, &entry);
|
||||
if (fd < 0)
|
||||
return NULL;
|
||||
|
||||
size = entry.last - entry.start + 1;
|
||||
*len = entry.last - iova + 1;
|
||||
addr = mmap(0, size, perm_to_prot(entry.perm), MAP_SHARED,
|
||||
fd, entry.offset);
|
||||
close(fd);
|
||||
if (addr == MAP_FAILED)
|
||||
return NULL;
|
||||
|
||||
/*
|
||||
* Using some data structures such as linked list to store
|
||||
* the iotlb mapping. The munmap(2) should be called for the
|
||||
* cached mapping when the corresponding VDUSE_UPDATE_IOTLB
|
||||
* message is received or the device is reset.
|
||||
*/
|
||||
|
||||
return addr + iova - entry.start;
|
||||
}
|
||||
|
||||
3. Setup the kick eventfd for the specified virtqueues with the VDUSE_VQ_SETUP_KICKFD
|
||||
ioctl. The kick eventfd is used by VDUSE kernel module to notify userspace to
|
||||
consume the available ring. This is optional since userspace can choose to poll the
|
||||
available ring instead.
|
||||
|
||||
4. Listen to the kick eventfd (optional) and consume the available ring. The buffer
|
||||
described by the descriptors in the descriptor table should be also mapped into
|
||||
userspace via the VDUSE_IOTLB_GET_FD ioctl before accessing.
|
||||
|
||||
5. Inject an interrupt for specific virtqueue with the VDUSE_INJECT_VQ_IRQ ioctl
|
||||
after the used ring is filled.
|
||||
|
||||
For more details on the uAPI, please see include/uapi/linux/vduse.h.
|
@ -762,7 +762,7 @@ static int virtblk_probe(struct virtio_device *vdev)
|
||||
goto out_free_vblk;
|
||||
|
||||
/* Default queue sizing is to fill the ring. */
|
||||
if (likely(!virtblk_queue_depth)) {
|
||||
if (!virtblk_queue_depth) {
|
||||
queue_depth = vblk->vqs[0].vq->num_free;
|
||||
/* ... but without indirect descs, we use 2 descs per req */
|
||||
if (!virtio_has_feature(vdev, VIRTIO_RING_F_INDIRECT_DESC))
|
||||
@ -836,7 +836,7 @@ static int virtblk_probe(struct virtio_device *vdev)
|
||||
else
|
||||
blk_size = queue_logical_block_size(q);
|
||||
|
||||
if (unlikely(blk_size < SECTOR_SIZE || blk_size > PAGE_SIZE)) {
|
||||
if (blk_size < SECTOR_SIZE || blk_size > PAGE_SIZE) {
|
||||
dev_err(&vdev->dev,
|
||||
"block size is changed unexpectedly, now is %u\n",
|
||||
blk_size);
|
||||
|
@ -519,6 +519,7 @@ retry:
|
||||
|
||||
return new_iova->pfn_lo;
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(alloc_iova_fast);
|
||||
|
||||
/**
|
||||
* free_iova_fast - free iova pfn range into rcache
|
||||
@ -536,6 +537,7 @@ free_iova_fast(struct iova_domain *iovad, unsigned long pfn, unsigned long size)
|
||||
|
||||
free_iova(iovad, pfn);
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(free_iova_fast);
|
||||
|
||||
#define fq_ring_for_each(i, fq) \
|
||||
for ((i) = (fq)->head; (i) != (fq)->tail; (i) = ((i) + 1) % IOVA_FQ_SIZE)
|
||||
|
@ -33,6 +33,16 @@ config VDPA_SIM_BLOCK
|
||||
vDPA block device simulator which terminates IO request in a
|
||||
memory buffer.
|
||||
|
||||
config VDPA_USER
|
||||
tristate "VDUSE (vDPA Device in Userspace) support"
|
||||
depends on EVENTFD && MMU && HAS_DMA
|
||||
select DMA_OPS
|
||||
select VHOST_IOTLB
|
||||
select IOMMU_IOVA
|
||||
help
|
||||
With VDUSE it is possible to emulate a vDPA Device
|
||||
in a userspace program.
|
||||
|
||||
config IFCVF
|
||||
tristate "Intel IFC VF vDPA driver"
|
||||
depends on PCI_MSI
|
||||
@ -53,6 +63,7 @@ config MLX5_VDPA
|
||||
config MLX5_VDPA_NET
|
||||
tristate "vDPA driver for ConnectX devices"
|
||||
select MLX5_VDPA
|
||||
select VHOST_RING
|
||||
depends on MLX5_CORE
|
||||
help
|
||||
VDPA network driver for ConnectX6 and newer. Provides offloading
|
||||
|
@ -1,6 +1,7 @@
|
||||
# SPDX-License-Identifier: GPL-2.0
|
||||
obj-$(CONFIG_VDPA) += vdpa.o
|
||||
obj-$(CONFIG_VDPA_SIM) += vdpa_sim/
|
||||
obj-$(CONFIG_VDPA_USER) += vdpa_user/
|
||||
obj-$(CONFIG_IFCVF) += ifcvf/
|
||||
obj-$(CONFIG_MLX5_VDPA) += mlx5/
|
||||
obj-$(CONFIG_VP_VDPA) += virtio_pci/
|
||||
|
@ -158,7 +158,9 @@ next:
|
||||
return -EIO;
|
||||
}
|
||||
|
||||
for (i = 0; i < IFCVF_MAX_QUEUE_PAIRS * 2; i++) {
|
||||
hw->nr_vring = ifc_ioread16(&hw->common_cfg->num_queues);
|
||||
|
||||
for (i = 0; i < hw->nr_vring; i++) {
|
||||
ifc_iowrite16(i, &hw->common_cfg->queue_select);
|
||||
notify_off = ifc_ioread16(&hw->common_cfg->queue_notify_off);
|
||||
hw->vring[i].notify_addr = hw->notify_base +
|
||||
@ -304,7 +306,7 @@ u16 ifcvf_get_vq_state(struct ifcvf_hw *hw, u16 qid)
|
||||
u32 q_pair_id;
|
||||
|
||||
ifcvf_lm = (struct ifcvf_lm_cfg __iomem *)hw->lm_cfg;
|
||||
q_pair_id = qid / (IFCVF_MAX_QUEUE_PAIRS * 2);
|
||||
q_pair_id = qid / hw->nr_vring;
|
||||
avail_idx_addr = &ifcvf_lm->vring_lm_cfg[q_pair_id].idx_addr[qid % 2];
|
||||
last_avail_idx = ifc_ioread16(avail_idx_addr);
|
||||
|
||||
@ -318,7 +320,7 @@ int ifcvf_set_vq_state(struct ifcvf_hw *hw, u16 qid, u16 num)
|
||||
u32 q_pair_id;
|
||||
|
||||
ifcvf_lm = (struct ifcvf_lm_cfg __iomem *)hw->lm_cfg;
|
||||
q_pair_id = qid / (IFCVF_MAX_QUEUE_PAIRS * 2);
|
||||
q_pair_id = qid / hw->nr_vring;
|
||||
avail_idx_addr = &ifcvf_lm->vring_lm_cfg[q_pair_id].idx_addr[qid % 2];
|
||||
hw->vring[qid].last_avail_idx = num;
|
||||
ifc_iowrite16(num, avail_idx_addr);
|
||||
|
@ -22,17 +22,8 @@
|
||||
#define N3000_DEVICE_ID 0x1041
|
||||
#define N3000_SUBSYS_DEVICE_ID 0x001A
|
||||
|
||||
#define IFCVF_NET_SUPPORTED_FEATURES \
|
||||
((1ULL << VIRTIO_NET_F_MAC) | \
|
||||
(1ULL << VIRTIO_F_ANY_LAYOUT) | \
|
||||
(1ULL << VIRTIO_F_VERSION_1) | \
|
||||
(1ULL << VIRTIO_NET_F_STATUS) | \
|
||||
(1ULL << VIRTIO_F_ORDER_PLATFORM) | \
|
||||
(1ULL << VIRTIO_F_ACCESS_PLATFORM) | \
|
||||
(1ULL << VIRTIO_NET_F_MRG_RXBUF))
|
||||
|
||||
/* Only one queue pair for now. */
|
||||
#define IFCVF_MAX_QUEUE_PAIRS 1
|
||||
/* Max 8 data queue pairs(16 queues) and one control vq for now. */
|
||||
#define IFCVF_MAX_QUEUES 17
|
||||
|
||||
#define IFCVF_QUEUE_ALIGNMENT PAGE_SIZE
|
||||
#define IFCVF_QUEUE_MAX 32768
|
||||
@ -51,8 +42,6 @@
|
||||
#define ifcvf_private_to_vf(adapter) \
|
||||
(&((struct ifcvf_adapter *)adapter)->vf)
|
||||
|
||||
#define IFCVF_MAX_INTR (IFCVF_MAX_QUEUE_PAIRS * 2 + 1)
|
||||
|
||||
struct vring_info {
|
||||
u64 desc;
|
||||
u64 avail;
|
||||
@ -83,7 +72,7 @@ struct ifcvf_hw {
|
||||
u32 dev_type;
|
||||
struct virtio_pci_common_cfg __iomem *common_cfg;
|
||||
void __iomem *net_cfg;
|
||||
struct vring_info vring[IFCVF_MAX_QUEUE_PAIRS * 2];
|
||||
struct vring_info vring[IFCVF_MAX_QUEUES];
|
||||
void __iomem * const *base;
|
||||
char config_msix_name[256];
|
||||
struct vdpa_callback config_cb;
|
||||
@ -103,7 +92,13 @@ struct ifcvf_vring_lm_cfg {
|
||||
|
||||
struct ifcvf_lm_cfg {
|
||||
u8 reserved[IFCVF_LM_RING_STATE_OFFSET];
|
||||
struct ifcvf_vring_lm_cfg vring_lm_cfg[IFCVF_MAX_QUEUE_PAIRS];
|
||||
struct ifcvf_vring_lm_cfg vring_lm_cfg[IFCVF_MAX_QUEUES];
|
||||
};
|
||||
|
||||
struct ifcvf_vdpa_mgmt_dev {
|
||||
struct vdpa_mgmt_dev mdev;
|
||||
struct ifcvf_adapter *adapter;
|
||||
struct pci_dev *pdev;
|
||||
};
|
||||
|
||||
int ifcvf_init_hw(struct ifcvf_hw *hw, struct pci_dev *dev);
|
||||
|
@ -63,9 +63,13 @@ static int ifcvf_request_irq(struct ifcvf_adapter *adapter)
|
||||
struct pci_dev *pdev = adapter->pdev;
|
||||
struct ifcvf_hw *vf = &adapter->vf;
|
||||
int vector, i, ret, irq;
|
||||
u16 max_intr;
|
||||
|
||||
ret = pci_alloc_irq_vectors(pdev, IFCVF_MAX_INTR,
|
||||
IFCVF_MAX_INTR, PCI_IRQ_MSIX);
|
||||
/* all queues and config interrupt */
|
||||
max_intr = vf->nr_vring + 1;
|
||||
|
||||
ret = pci_alloc_irq_vectors(pdev, max_intr,
|
||||
max_intr, PCI_IRQ_MSIX);
|
||||
if (ret < 0) {
|
||||
IFCVF_ERR(pdev, "Failed to alloc IRQ vectors\n");
|
||||
return ret;
|
||||
@ -83,7 +87,7 @@ static int ifcvf_request_irq(struct ifcvf_adapter *adapter)
|
||||
return ret;
|
||||
}
|
||||
|
||||
for (i = 0; i < IFCVF_MAX_QUEUE_PAIRS * 2; i++) {
|
||||
for (i = 0; i < vf->nr_vring; i++) {
|
||||
snprintf(vf->vring[i].msix_name, 256, "ifcvf[%s]-%d\n",
|
||||
pci_name(pdev), i);
|
||||
vector = i + IFCVF_MSI_QUEUE_OFF;
|
||||
@ -112,7 +116,6 @@ static int ifcvf_start_datapath(void *private)
|
||||
u8 status;
|
||||
int ret;
|
||||
|
||||
vf->nr_vring = IFCVF_MAX_QUEUE_PAIRS * 2;
|
||||
ret = ifcvf_start_hw(vf);
|
||||
if (ret < 0) {
|
||||
status = ifcvf_get_status(vf);
|
||||
@ -128,7 +131,7 @@ static int ifcvf_stop_datapath(void *private)
|
||||
struct ifcvf_hw *vf = ifcvf_private_to_vf(private);
|
||||
int i;
|
||||
|
||||
for (i = 0; i < IFCVF_MAX_QUEUE_PAIRS * 2; i++)
|
||||
for (i = 0; i < vf->nr_vring; i++)
|
||||
vf->vring[i].cb.callback = NULL;
|
||||
|
||||
ifcvf_stop_hw(vf);
|
||||
@ -141,7 +144,7 @@ static void ifcvf_reset_vring(struct ifcvf_adapter *adapter)
|
||||
struct ifcvf_hw *vf = ifcvf_private_to_vf(adapter);
|
||||
int i;
|
||||
|
||||
for (i = 0; i < IFCVF_MAX_QUEUE_PAIRS * 2; i++) {
|
||||
for (i = 0; i < vf->nr_vring; i++) {
|
||||
vf->vring[i].last_avail_idx = 0;
|
||||
vf->vring[i].desc = 0;
|
||||
vf->vring[i].avail = 0;
|
||||
@ -171,17 +174,12 @@ static u64 ifcvf_vdpa_get_features(struct vdpa_device *vdpa_dev)
|
||||
struct ifcvf_adapter *adapter = vdpa_to_adapter(vdpa_dev);
|
||||
struct ifcvf_hw *vf = vdpa_to_vf(vdpa_dev);
|
||||
struct pci_dev *pdev = adapter->pdev;
|
||||
|
||||
u32 type = vf->dev_type;
|
||||
u64 features;
|
||||
|
||||
switch (vf->dev_type) {
|
||||
case VIRTIO_ID_NET:
|
||||
features = ifcvf_get_features(vf) & IFCVF_NET_SUPPORTED_FEATURES;
|
||||
break;
|
||||
case VIRTIO_ID_BLOCK:
|
||||
if (type == VIRTIO_ID_NET || type == VIRTIO_ID_BLOCK)
|
||||
features = ifcvf_get_features(vf);
|
||||
break;
|
||||
default:
|
||||
else {
|
||||
features = 0;
|
||||
IFCVF_ERR(pdev, "VIRTIO ID %u not supported\n", vf->dev_type);
|
||||
}
|
||||
@ -218,23 +216,12 @@ static void ifcvf_vdpa_set_status(struct vdpa_device *vdpa_dev, u8 status)
|
||||
int ret;
|
||||
|
||||
vf = vdpa_to_vf(vdpa_dev);
|
||||
adapter = dev_get_drvdata(vdpa_dev->dev.parent);
|
||||
adapter = vdpa_to_adapter(vdpa_dev);
|
||||
status_old = ifcvf_get_status(vf);
|
||||
|
||||
if (status_old == status)
|
||||
return;
|
||||
|
||||
if ((status_old & VIRTIO_CONFIG_S_DRIVER_OK) &&
|
||||
!(status & VIRTIO_CONFIG_S_DRIVER_OK)) {
|
||||
ifcvf_stop_datapath(adapter);
|
||||
ifcvf_free_irq(adapter, IFCVF_MAX_QUEUE_PAIRS * 2);
|
||||
}
|
||||
|
||||
if (status == 0) {
|
||||
ifcvf_reset_vring(adapter);
|
||||
return;
|
||||
}
|
||||
|
||||
if ((status & VIRTIO_CONFIG_S_DRIVER_OK) &&
|
||||
!(status_old & VIRTIO_CONFIG_S_DRIVER_OK)) {
|
||||
ret = ifcvf_request_irq(adapter);
|
||||
@ -254,6 +241,29 @@ static void ifcvf_vdpa_set_status(struct vdpa_device *vdpa_dev, u8 status)
|
||||
ifcvf_set_status(vf, status);
|
||||
}
|
||||
|
||||
static int ifcvf_vdpa_reset(struct vdpa_device *vdpa_dev)
|
||||
{
|
||||
struct ifcvf_adapter *adapter;
|
||||
struct ifcvf_hw *vf;
|
||||
u8 status_old;
|
||||
|
||||
vf = vdpa_to_vf(vdpa_dev);
|
||||
adapter = vdpa_to_adapter(vdpa_dev);
|
||||
status_old = ifcvf_get_status(vf);
|
||||
|
||||
if (status_old == 0)
|
||||
return 0;
|
||||
|
||||
if (status_old & VIRTIO_CONFIG_S_DRIVER_OK) {
|
||||
ifcvf_stop_datapath(adapter);
|
||||
ifcvf_free_irq(adapter, vf->nr_vring);
|
||||
}
|
||||
|
||||
ifcvf_reset_vring(adapter);
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static u16 ifcvf_vdpa_get_vq_num_max(struct vdpa_device *vdpa_dev)
|
||||
{
|
||||
return IFCVF_QUEUE_MAX;
|
||||
@ -437,6 +447,7 @@ static const struct vdpa_config_ops ifc_vdpa_ops = {
|
||||
.set_features = ifcvf_vdpa_set_features,
|
||||
.get_status = ifcvf_vdpa_get_status,
|
||||
.set_status = ifcvf_vdpa_set_status,
|
||||
.reset = ifcvf_vdpa_reset,
|
||||
.get_vq_num_max = ifcvf_vdpa_get_vq_num_max,
|
||||
.get_vq_state = ifcvf_vdpa_get_vq_state,
|
||||
.set_vq_state = ifcvf_vdpa_set_vq_state,
|
||||
@ -458,50 +469,19 @@ static const struct vdpa_config_ops ifc_vdpa_ops = {
|
||||
.get_vq_notification = ifcvf_get_vq_notification,
|
||||
};
|
||||
|
||||
static int ifcvf_probe(struct pci_dev *pdev, const struct pci_device_id *id)
|
||||
static struct virtio_device_id id_table_net[] = {
|
||||
{VIRTIO_ID_NET, VIRTIO_DEV_ANY_ID},
|
||||
{0},
|
||||
};
|
||||
|
||||
static struct virtio_device_id id_table_blk[] = {
|
||||
{VIRTIO_ID_BLOCK, VIRTIO_DEV_ANY_ID},
|
||||
{0},
|
||||
};
|
||||
|
||||
static u32 get_dev_type(struct pci_dev *pdev)
|
||||
{
|
||||
struct device *dev = &pdev->dev;
|
||||
struct ifcvf_adapter *adapter;
|
||||
struct ifcvf_hw *vf;
|
||||
int ret, i;
|
||||
|
||||
ret = pcim_enable_device(pdev);
|
||||
if (ret) {
|
||||
IFCVF_ERR(pdev, "Failed to enable device\n");
|
||||
return ret;
|
||||
}
|
||||
|
||||
ret = pcim_iomap_regions(pdev, BIT(0) | BIT(2) | BIT(4),
|
||||
IFCVF_DRIVER_NAME);
|
||||
if (ret) {
|
||||
IFCVF_ERR(pdev, "Failed to request MMIO region\n");
|
||||
return ret;
|
||||
}
|
||||
|
||||
ret = dma_set_mask_and_coherent(dev, DMA_BIT_MASK(64));
|
||||
if (ret) {
|
||||
IFCVF_ERR(pdev, "No usable DMA configuration\n");
|
||||
return ret;
|
||||
}
|
||||
|
||||
ret = devm_add_action_or_reset(dev, ifcvf_free_irq_vectors, pdev);
|
||||
if (ret) {
|
||||
IFCVF_ERR(pdev,
|
||||
"Failed for adding devres for freeing irq vectors\n");
|
||||
return ret;
|
||||
}
|
||||
|
||||
adapter = vdpa_alloc_device(struct ifcvf_adapter, vdpa,
|
||||
dev, &ifc_vdpa_ops, NULL);
|
||||
if (IS_ERR(adapter)) {
|
||||
IFCVF_ERR(pdev, "Failed to allocate vDPA structure");
|
||||
return PTR_ERR(adapter);
|
||||
}
|
||||
|
||||
pci_set_master(pdev);
|
||||
pci_set_drvdata(pdev, adapter);
|
||||
|
||||
vf = &adapter->vf;
|
||||
u32 dev_type;
|
||||
|
||||
/* This drirver drives both modern virtio devices and transitional
|
||||
* devices in modern mode.
|
||||
@ -510,11 +490,42 @@ static int ifcvf_probe(struct pci_dev *pdev, const struct pci_device_id *id)
|
||||
* mode will not work for vDPA, this driver will not
|
||||
* drive devices with legacy interface.
|
||||
*/
|
||||
if (pdev->device < 0x1040)
|
||||
vf->dev_type = pdev->subsystem_device;
|
||||
else
|
||||
vf->dev_type = pdev->device - 0x1040;
|
||||
|
||||
if (pdev->device < 0x1040)
|
||||
dev_type = pdev->subsystem_device;
|
||||
else
|
||||
dev_type = pdev->device - 0x1040;
|
||||
|
||||
return dev_type;
|
||||
}
|
||||
|
||||
static int ifcvf_vdpa_dev_add(struct vdpa_mgmt_dev *mdev, const char *name)
|
||||
{
|
||||
struct ifcvf_vdpa_mgmt_dev *ifcvf_mgmt_dev;
|
||||
struct ifcvf_adapter *adapter;
|
||||
struct pci_dev *pdev;
|
||||
struct ifcvf_hw *vf;
|
||||
struct device *dev;
|
||||
int ret, i;
|
||||
|
||||
ifcvf_mgmt_dev = container_of(mdev, struct ifcvf_vdpa_mgmt_dev, mdev);
|
||||
if (ifcvf_mgmt_dev->adapter)
|
||||
return -EOPNOTSUPP;
|
||||
|
||||
pdev = ifcvf_mgmt_dev->pdev;
|
||||
dev = &pdev->dev;
|
||||
adapter = vdpa_alloc_device(struct ifcvf_adapter, vdpa,
|
||||
dev, &ifc_vdpa_ops, name, false);
|
||||
if (IS_ERR(adapter)) {
|
||||
IFCVF_ERR(pdev, "Failed to allocate vDPA structure");
|
||||
return PTR_ERR(adapter);
|
||||
}
|
||||
|
||||
ifcvf_mgmt_dev->adapter = adapter;
|
||||
pci_set_drvdata(pdev, ifcvf_mgmt_dev);
|
||||
|
||||
vf = &adapter->vf;
|
||||
vf->dev_type = get_dev_type(pdev);
|
||||
vf->base = pcim_iomap_table(pdev);
|
||||
|
||||
adapter->pdev = pdev;
|
||||
@ -526,14 +537,15 @@ static int ifcvf_probe(struct pci_dev *pdev, const struct pci_device_id *id)
|
||||
goto err;
|
||||
}
|
||||
|
||||
for (i = 0; i < IFCVF_MAX_QUEUE_PAIRS * 2; i++)
|
||||
for (i = 0; i < vf->nr_vring; i++)
|
||||
vf->vring[i].irq = -EINVAL;
|
||||
|
||||
vf->hw_features = ifcvf_get_hw_features(vf);
|
||||
|
||||
ret = vdpa_register_device(&adapter->vdpa, IFCVF_MAX_QUEUE_PAIRS * 2);
|
||||
adapter->vdpa.mdev = &ifcvf_mgmt_dev->mdev;
|
||||
ret = _vdpa_register_device(&adapter->vdpa, vf->nr_vring);
|
||||
if (ret) {
|
||||
IFCVF_ERR(pdev, "Failed to register ifcvf to vdpa bus");
|
||||
IFCVF_ERR(pdev, "Failed to register to vDPA bus");
|
||||
goto err;
|
||||
}
|
||||
|
||||
@ -544,11 +556,100 @@ err:
|
||||
return ret;
|
||||
}
|
||||
|
||||
static void ifcvf_vdpa_dev_del(struct vdpa_mgmt_dev *mdev, struct vdpa_device *dev)
|
||||
{
|
||||
struct ifcvf_vdpa_mgmt_dev *ifcvf_mgmt_dev;
|
||||
|
||||
ifcvf_mgmt_dev = container_of(mdev, struct ifcvf_vdpa_mgmt_dev, mdev);
|
||||
_vdpa_unregister_device(dev);
|
||||
ifcvf_mgmt_dev->adapter = NULL;
|
||||
}
|
||||
|
||||
static const struct vdpa_mgmtdev_ops ifcvf_vdpa_mgmt_dev_ops = {
|
||||
.dev_add = ifcvf_vdpa_dev_add,
|
||||
.dev_del = ifcvf_vdpa_dev_del
|
||||
};
|
||||
|
||||
static int ifcvf_probe(struct pci_dev *pdev, const struct pci_device_id *id)
|
||||
{
|
||||
struct ifcvf_vdpa_mgmt_dev *ifcvf_mgmt_dev;
|
||||
struct device *dev = &pdev->dev;
|
||||
u32 dev_type;
|
||||
int ret;
|
||||
|
||||
ifcvf_mgmt_dev = kzalloc(sizeof(struct ifcvf_vdpa_mgmt_dev), GFP_KERNEL);
|
||||
if (!ifcvf_mgmt_dev) {
|
||||
IFCVF_ERR(pdev, "Failed to alloc memory for the vDPA management device\n");
|
||||
return -ENOMEM;
|
||||
}
|
||||
|
||||
dev_type = get_dev_type(pdev);
|
||||
switch (dev_type) {
|
||||
case VIRTIO_ID_NET:
|
||||
ifcvf_mgmt_dev->mdev.id_table = id_table_net;
|
||||
break;
|
||||
case VIRTIO_ID_BLOCK:
|
||||
ifcvf_mgmt_dev->mdev.id_table = id_table_blk;
|
||||
break;
|
||||
default:
|
||||
IFCVF_ERR(pdev, "VIRTIO ID %u not supported\n", dev_type);
|
||||
ret = -EOPNOTSUPP;
|
||||
goto err;
|
||||
}
|
||||
|
||||
ifcvf_mgmt_dev->mdev.ops = &ifcvf_vdpa_mgmt_dev_ops;
|
||||
ifcvf_mgmt_dev->mdev.device = dev;
|
||||
ifcvf_mgmt_dev->pdev = pdev;
|
||||
|
||||
ret = pcim_enable_device(pdev);
|
||||
if (ret) {
|
||||
IFCVF_ERR(pdev, "Failed to enable device\n");
|
||||
goto err;
|
||||
}
|
||||
|
||||
ret = pcim_iomap_regions(pdev, BIT(0) | BIT(2) | BIT(4),
|
||||
IFCVF_DRIVER_NAME);
|
||||
if (ret) {
|
||||
IFCVF_ERR(pdev, "Failed to request MMIO region\n");
|
||||
goto err;
|
||||
}
|
||||
|
||||
ret = dma_set_mask_and_coherent(dev, DMA_BIT_MASK(64));
|
||||
if (ret) {
|
||||
IFCVF_ERR(pdev, "No usable DMA configuration\n");
|
||||
goto err;
|
||||
}
|
||||
|
||||
ret = devm_add_action_or_reset(dev, ifcvf_free_irq_vectors, pdev);
|
||||
if (ret) {
|
||||
IFCVF_ERR(pdev,
|
||||
"Failed for adding devres for freeing irq vectors\n");
|
||||
goto err;
|
||||
}
|
||||
|
||||
pci_set_master(pdev);
|
||||
|
||||
ret = vdpa_mgmtdev_register(&ifcvf_mgmt_dev->mdev);
|
||||
if (ret) {
|
||||
IFCVF_ERR(pdev,
|
||||
"Failed to initialize the management interfaces\n");
|
||||
goto err;
|
||||
}
|
||||
|
||||
return 0;
|
||||
|
||||
err:
|
||||
kfree(ifcvf_mgmt_dev);
|
||||
return ret;
|
||||
}
|
||||
|
||||
static void ifcvf_remove(struct pci_dev *pdev)
|
||||
{
|
||||
struct ifcvf_adapter *adapter = pci_get_drvdata(pdev);
|
||||
struct ifcvf_vdpa_mgmt_dev *ifcvf_mgmt_dev;
|
||||
|
||||
vdpa_unregister_device(&adapter->vdpa);
|
||||
ifcvf_mgmt_dev = pci_get_drvdata(pdev);
|
||||
vdpa_mgmtdev_unregister(&ifcvf_mgmt_dev->mdev);
|
||||
kfree(ifcvf_mgmt_dev);
|
||||
}
|
||||
|
||||
static struct pci_device_id ifcvf_pci_ids[] = {
|
||||
|
@ -5,7 +5,7 @@
|
||||
#define __MLX5_VDPA_H__
|
||||
|
||||
#include <linux/etherdevice.h>
|
||||
#include <linux/if_vlan.h>
|
||||
#include <linux/vringh.h>
|
||||
#include <linux/vdpa.h>
|
||||
#include <linux/mlx5/driver.h>
|
||||
|
||||
@ -48,6 +48,26 @@ struct mlx5_vdpa_resources {
|
||||
bool valid;
|
||||
};
|
||||
|
||||
struct mlx5_control_vq {
|
||||
struct vhost_iotlb *iotlb;
|
||||
/* spinlock to synchronize iommu table */
|
||||
spinlock_t iommu_lock;
|
||||
struct vringh vring;
|
||||
bool ready;
|
||||
u64 desc_addr;
|
||||
u64 device_addr;
|
||||
u64 driver_addr;
|
||||
struct vdpa_callback event_cb;
|
||||
struct vringh_kiov riov;
|
||||
struct vringh_kiov wiov;
|
||||
unsigned short head;
|
||||
};
|
||||
|
||||
struct mlx5_ctrl_wq_ent {
|
||||
struct work_struct work;
|
||||
struct mlx5_vdpa_dev *mvdev;
|
||||
};
|
||||
|
||||
struct mlx5_vdpa_dev {
|
||||
struct vdpa_device vdev;
|
||||
struct mlx5_core_dev *mdev;
|
||||
@ -57,9 +77,12 @@ struct mlx5_vdpa_dev {
|
||||
u64 actual_features;
|
||||
u8 status;
|
||||
u32 max_vqs;
|
||||
u16 max_idx;
|
||||
u32 generation;
|
||||
|
||||
struct mlx5_vdpa_mr mr;
|
||||
struct mlx5_control_vq cvq;
|
||||
struct workqueue_struct *wq;
|
||||
};
|
||||
|
||||
int mlx5_vdpa_alloc_pd(struct mlx5_vdpa_dev *dev, u32 *pdn, u16 uid);
|
||||
@ -68,6 +91,7 @@ int mlx5_vdpa_get_null_mkey(struct mlx5_vdpa_dev *dev, u32 *null_mkey);
|
||||
int mlx5_vdpa_create_tis(struct mlx5_vdpa_dev *mvdev, void *in, u32 *tisn);
|
||||
void mlx5_vdpa_destroy_tis(struct mlx5_vdpa_dev *mvdev, u32 tisn);
|
||||
int mlx5_vdpa_create_rqt(struct mlx5_vdpa_dev *mvdev, void *in, int inlen, u32 *rqtn);
|
||||
int mlx5_vdpa_modify_rqt(struct mlx5_vdpa_dev *mvdev, void *in, int inlen, u32 rqtn);
|
||||
void mlx5_vdpa_destroy_rqt(struct mlx5_vdpa_dev *mvdev, u32 rqtn);
|
||||
int mlx5_vdpa_create_tir(struct mlx5_vdpa_dev *mvdev, void *in, u32 *tirn);
|
||||
void mlx5_vdpa_destroy_tir(struct mlx5_vdpa_dev *mvdev, u32 tirn);
|
||||
|
@ -1,6 +1,7 @@
|
||||
// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB
|
||||
/* Copyright (c) 2020 Mellanox Technologies Ltd. */
|
||||
|
||||
#include <linux/vhost_types.h>
|
||||
#include <linux/vdpa.h>
|
||||
#include <linux/gcd.h>
|
||||
#include <linux/string.h>
|
||||
@ -451,33 +452,30 @@ static void destroy_dma_mr(struct mlx5_vdpa_dev *mvdev, struct mlx5_vdpa_mr *mr)
|
||||
mlx5_vdpa_destroy_mkey(mvdev, &mr->mkey);
|
||||
}
|
||||
|
||||
static int _mlx5_vdpa_create_mr(struct mlx5_vdpa_dev *mvdev, struct vhost_iotlb *iotlb)
|
||||
static int dup_iotlb(struct mlx5_vdpa_dev *mvdev, struct vhost_iotlb *src)
|
||||
{
|
||||
struct mlx5_vdpa_mr *mr = &mvdev->mr;
|
||||
struct vhost_iotlb_map *map;
|
||||
u64 start = 0, last = ULLONG_MAX;
|
||||
int err;
|
||||
|
||||
if (mr->initialized)
|
||||
return 0;
|
||||
if (!src) {
|
||||
err = vhost_iotlb_add_range(mvdev->cvq.iotlb, start, last, start, VHOST_ACCESS_RW);
|
||||
return err;
|
||||
}
|
||||
|
||||
if (iotlb)
|
||||
err = create_user_mr(mvdev, iotlb);
|
||||
else
|
||||
err = create_dma_mr(mvdev, mr);
|
||||
|
||||
if (!err)
|
||||
mr->initialized = true;
|
||||
|
||||
return err;
|
||||
for (map = vhost_iotlb_itree_first(src, start, last); map;
|
||||
map = vhost_iotlb_itree_next(map, start, last)) {
|
||||
err = vhost_iotlb_add_range(mvdev->cvq.iotlb, map->start, map->last,
|
||||
map->addr, map->perm);
|
||||
if (err)
|
||||
return err;
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
int mlx5_vdpa_create_mr(struct mlx5_vdpa_dev *mvdev, struct vhost_iotlb *iotlb)
|
||||
static void prune_iotlb(struct mlx5_vdpa_dev *mvdev)
|
||||
{
|
||||
int err;
|
||||
|
||||
mutex_lock(&mvdev->mr.mkey_mtx);
|
||||
err = _mlx5_vdpa_create_mr(mvdev, iotlb);
|
||||
mutex_unlock(&mvdev->mr.mkey_mtx);
|
||||
return err;
|
||||
vhost_iotlb_del_range(mvdev->cvq.iotlb, 0, ULLONG_MAX);
|
||||
}
|
||||
|
||||
static void destroy_user_mr(struct mlx5_vdpa_dev *mvdev, struct mlx5_vdpa_mr *mr)
|
||||
@ -501,6 +499,7 @@ void mlx5_vdpa_destroy_mr(struct mlx5_vdpa_dev *mvdev)
|
||||
if (!mr->initialized)
|
||||
goto out;
|
||||
|
||||
prune_iotlb(mvdev);
|
||||
if (mr->user_mr)
|
||||
destroy_user_mr(mvdev, mr);
|
||||
else
|
||||
@ -512,6 +511,48 @@ out:
|
||||
mutex_unlock(&mr->mkey_mtx);
|
||||
}
|
||||
|
||||
static int _mlx5_vdpa_create_mr(struct mlx5_vdpa_dev *mvdev, struct vhost_iotlb *iotlb)
|
||||
{
|
||||
struct mlx5_vdpa_mr *mr = &mvdev->mr;
|
||||
int err;
|
||||
|
||||
if (mr->initialized)
|
||||
return 0;
|
||||
|
||||
if (iotlb)
|
||||
err = create_user_mr(mvdev, iotlb);
|
||||
else
|
||||
err = create_dma_mr(mvdev, mr);
|
||||
|
||||
if (err)
|
||||
return err;
|
||||
|
||||
err = dup_iotlb(mvdev, iotlb);
|
||||
if (err)
|
||||
goto out_err;
|
||||
|
||||
mr->initialized = true;
|
||||
return 0;
|
||||
|
||||
out_err:
|
||||
if (iotlb)
|
||||
destroy_user_mr(mvdev, mr);
|
||||
else
|
||||
destroy_dma_mr(mvdev, mr);
|
||||
|
||||
return err;
|
||||
}
|
||||
|
||||
int mlx5_vdpa_create_mr(struct mlx5_vdpa_dev *mvdev, struct vhost_iotlb *iotlb)
|
||||
{
|
||||
int err;
|
||||
|
||||
mutex_lock(&mvdev->mr.mkey_mtx);
|
||||
err = _mlx5_vdpa_create_mr(mvdev, iotlb);
|
||||
mutex_unlock(&mvdev->mr.mkey_mtx);
|
||||
return err;
|
||||
}
|
||||
|
||||
int mlx5_vdpa_handle_set_map(struct mlx5_vdpa_dev *mvdev, struct vhost_iotlb *iotlb,
|
||||
bool *change_map)
|
||||
{
|
||||
|
@ -1,6 +1,7 @@
|
||||
// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB
|
||||
/* Copyright (c) 2020 Mellanox Technologies Ltd. */
|
||||
|
||||
#include <linux/iova.h>
|
||||
#include <linux/mlx5/driver.h>
|
||||
#include "mlx5_vdpa.h"
|
||||
|
||||
@ -128,6 +129,16 @@ int mlx5_vdpa_create_rqt(struct mlx5_vdpa_dev *mvdev, void *in, int inlen, u32 *
|
||||
return err;
|
||||
}
|
||||
|
||||
int mlx5_vdpa_modify_rqt(struct mlx5_vdpa_dev *mvdev, void *in, int inlen, u32 rqtn)
|
||||
{
|
||||
u32 out[MLX5_ST_SZ_DW(create_rqt_out)] = {};
|
||||
|
||||
MLX5_SET(modify_rqt_in, in, uid, mvdev->res.uid);
|
||||
MLX5_SET(modify_rqt_in, in, rqtn, rqtn);
|
||||
MLX5_SET(modify_rqt_in, in, opcode, MLX5_CMD_OP_MODIFY_RQT);
|
||||
return mlx5_cmd_exec(mvdev->mdev, in, inlen, out, sizeof(out));
|
||||
}
|
||||
|
||||
void mlx5_vdpa_destroy_rqt(struct mlx5_vdpa_dev *mvdev, u32 rqtn)
|
||||
{
|
||||
u32 in[MLX5_ST_SZ_DW(destroy_rqt_in)] = {};
|
||||
@ -221,6 +232,22 @@ int mlx5_vdpa_destroy_mkey(struct mlx5_vdpa_dev *mvdev, struct mlx5_core_mkey *m
|
||||
return mlx5_cmd_exec_in(mvdev->mdev, destroy_mkey, in);
|
||||
}
|
||||
|
||||
static int init_ctrl_vq(struct mlx5_vdpa_dev *mvdev)
|
||||
{
|
||||
mvdev->cvq.iotlb = vhost_iotlb_alloc(0, 0);
|
||||
if (!mvdev->cvq.iotlb)
|
||||
return -ENOMEM;
|
||||
|
||||
vringh_set_iotlb(&mvdev->cvq.vring, mvdev->cvq.iotlb, &mvdev->cvq.iommu_lock);
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static void cleanup_ctrl_vq(struct mlx5_vdpa_dev *mvdev)
|
||||
{
|
||||
vhost_iotlb_free(mvdev->cvq.iotlb);
|
||||
}
|
||||
|
||||
int mlx5_vdpa_alloc_resources(struct mlx5_vdpa_dev *mvdev)
|
||||
{
|
||||
u64 offset = MLX5_CAP64_DEV_VDPA_EMULATION(mvdev->mdev, doorbell_bar_offset);
|
||||
@ -260,10 +287,17 @@ int mlx5_vdpa_alloc_resources(struct mlx5_vdpa_dev *mvdev)
|
||||
err = -ENOMEM;
|
||||
goto err_key;
|
||||
}
|
||||
|
||||
err = init_ctrl_vq(mvdev);
|
||||
if (err)
|
||||
goto err_ctrl;
|
||||
|
||||
res->valid = true;
|
||||
|
||||
return 0;
|
||||
|
||||
err_ctrl:
|
||||
iounmap(res->kick_addr);
|
||||
err_key:
|
||||
dealloc_pd(mvdev, res->pdn, res->uid);
|
||||
err_pd:
|
||||
@ -282,6 +316,7 @@ void mlx5_vdpa_free_resources(struct mlx5_vdpa_dev *mvdev)
|
||||
if (!res->valid)
|
||||
return;
|
||||
|
||||
cleanup_ctrl_vq(mvdev);
|
||||
iounmap(res->kick_addr);
|
||||
res->kick_addr = NULL;
|
||||
dealloc_pd(mvdev, res->pdn, res->uid);
|
||||
|
@ -45,6 +45,8 @@ MODULE_LICENSE("Dual BSD/GPL");
|
||||
(VIRTIO_CONFIG_S_ACKNOWLEDGE | VIRTIO_CONFIG_S_DRIVER | VIRTIO_CONFIG_S_DRIVER_OK | \
|
||||
VIRTIO_CONFIG_S_FEATURES_OK | VIRTIO_CONFIG_S_NEEDS_RESET | VIRTIO_CONFIG_S_FAILED)
|
||||
|
||||
#define MLX5_FEATURE(_mvdev, _feature) (!!((_mvdev)->actual_features & BIT_ULL(_feature)))
|
||||
|
||||
struct mlx5_vdpa_net_resources {
|
||||
u32 tisn;
|
||||
u32 tdn;
|
||||
@ -90,7 +92,6 @@ struct mlx5_vq_restore_info {
|
||||
u16 avail_index;
|
||||
u16 used_index;
|
||||
bool ready;
|
||||
struct vdpa_callback cb;
|
||||
bool restore;
|
||||
};
|
||||
|
||||
@ -100,7 +101,6 @@ struct mlx5_vdpa_virtqueue {
|
||||
u64 device_addr;
|
||||
u64 driver_addr;
|
||||
u32 num_ent;
|
||||
struct vdpa_callback event_cb;
|
||||
|
||||
/* Resources for implementing the notification channel from the device
|
||||
* to the driver. fwqp is the firmware end of an RC connection; the
|
||||
@ -135,11 +135,20 @@ struct mlx5_vdpa_virtqueue {
|
||||
*/
|
||||
#define MLX5_MAX_SUPPORTED_VQS 16
|
||||
|
||||
static bool is_index_valid(struct mlx5_vdpa_dev *mvdev, u16 idx)
|
||||
{
|
||||
if (unlikely(idx > mvdev->max_idx))
|
||||
return false;
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
struct mlx5_vdpa_net {
|
||||
struct mlx5_vdpa_dev mvdev;
|
||||
struct mlx5_vdpa_net_resources res;
|
||||
struct virtio_net_config config;
|
||||
struct mlx5_vdpa_virtqueue vqs[MLX5_MAX_SUPPORTED_VQS];
|
||||
struct vdpa_callback event_cbs[MLX5_MAX_SUPPORTED_VQS + 1];
|
||||
|
||||
/* Serialize vq resources creation and destruction. This is required
|
||||
* since memory map might change and we need to destroy and create
|
||||
@ -151,15 +160,18 @@ struct mlx5_vdpa_net {
|
||||
struct mlx5_flow_handle *rx_rule;
|
||||
bool setup;
|
||||
u16 mtu;
|
||||
u32 cur_num_vqs;
|
||||
};
|
||||
|
||||
static void free_resources(struct mlx5_vdpa_net *ndev);
|
||||
static void init_mvqs(struct mlx5_vdpa_net *ndev);
|
||||
static int setup_driver(struct mlx5_vdpa_net *ndev);
|
||||
static int setup_driver(struct mlx5_vdpa_dev *mvdev);
|
||||
static void teardown_driver(struct mlx5_vdpa_net *ndev);
|
||||
|
||||
static bool mlx5_vdpa_debug;
|
||||
|
||||
#define MLX5_CVQ_MAX_ENT 16
|
||||
|
||||
#define MLX5_LOG_VIO_FLAG(_feature) \
|
||||
do { \
|
||||
if (features & BIT_ULL(_feature)) \
|
||||
@ -172,11 +184,41 @@ static bool mlx5_vdpa_debug;
|
||||
mlx5_vdpa_info(mvdev, "%s\n", #_status); \
|
||||
} while (0)
|
||||
|
||||
/* TODO: cross-endian support */
|
||||
static inline bool mlx5_vdpa_is_little_endian(struct mlx5_vdpa_dev *mvdev)
|
||||
{
|
||||
return virtio_legacy_is_little_endian() ||
|
||||
(mvdev->actual_features & BIT_ULL(VIRTIO_F_VERSION_1));
|
||||
}
|
||||
|
||||
static u16 mlx5vdpa16_to_cpu(struct mlx5_vdpa_dev *mvdev, __virtio16 val)
|
||||
{
|
||||
return __virtio16_to_cpu(mlx5_vdpa_is_little_endian(mvdev), val);
|
||||
}
|
||||
|
||||
static __virtio16 cpu_to_mlx5vdpa16(struct mlx5_vdpa_dev *mvdev, u16 val)
|
||||
{
|
||||
return __cpu_to_virtio16(mlx5_vdpa_is_little_endian(mvdev), val);
|
||||
}
|
||||
|
||||
static inline u32 mlx5_vdpa_max_qps(int max_vqs)
|
||||
{
|
||||
return max_vqs / 2;
|
||||
}
|
||||
|
||||
static u16 ctrl_vq_idx(struct mlx5_vdpa_dev *mvdev)
|
||||
{
|
||||
if (!(mvdev->actual_features & BIT_ULL(VIRTIO_NET_F_MQ)))
|
||||
return 2;
|
||||
|
||||
return 2 * mlx5_vdpa_max_qps(mvdev->max_vqs);
|
||||
}
|
||||
|
||||
static bool is_ctrl_vq_idx(struct mlx5_vdpa_dev *mvdev, u16 idx)
|
||||
{
|
||||
return idx == ctrl_vq_idx(mvdev);
|
||||
}
|
||||
|
||||
static void print_status(struct mlx5_vdpa_dev *mvdev, u8 status, bool set)
|
||||
{
|
||||
if (status & ~VALID_STATUS_MASK)
|
||||
@ -481,6 +523,10 @@ static int mlx5_vdpa_poll_one(struct mlx5_vdpa_cq *vcq)
|
||||
|
||||
static void mlx5_vdpa_handle_completions(struct mlx5_vdpa_virtqueue *mvq, int num)
|
||||
{
|
||||
struct mlx5_vdpa_net *ndev = mvq->ndev;
|
||||
struct vdpa_callback *event_cb;
|
||||
|
||||
event_cb = &ndev->event_cbs[mvq->index];
|
||||
mlx5_cq_set_ci(&mvq->cq.mcq);
|
||||
|
||||
/* make sure CQ cosumer update is visible to the hardware before updating
|
||||
@ -488,8 +534,8 @@ static void mlx5_vdpa_handle_completions(struct mlx5_vdpa_virtqueue *mvq, int nu
|
||||
*/
|
||||
dma_wmb();
|
||||
rx_post(&mvq->vqqp, num);
|
||||
if (mvq->event_cb.callback)
|
||||
mvq->event_cb.callback(mvq->event_cb.private);
|
||||
if (event_cb->callback)
|
||||
event_cb->callback(event_cb->private);
|
||||
}
|
||||
|
||||
static void mlx5_vdpa_cq_comp(struct mlx5_core_cq *mcq, struct mlx5_eqe *eqe)
|
||||
@ -1100,10 +1146,8 @@ static int setup_vq(struct mlx5_vdpa_net *ndev, struct mlx5_vdpa_virtqueue *mvq)
|
||||
if (!mvq->num_ent)
|
||||
return 0;
|
||||
|
||||
if (mvq->initialized) {
|
||||
mlx5_vdpa_warn(&ndev->mvdev, "attempt re init\n");
|
||||
return -EINVAL;
|
||||
}
|
||||
if (mvq->initialized)
|
||||
return 0;
|
||||
|
||||
err = cq_create(ndev, idx, mvq->num_ent);
|
||||
if (err)
|
||||
@ -1190,19 +1234,20 @@ static void teardown_vq(struct mlx5_vdpa_net *ndev, struct mlx5_vdpa_virtqueue *
|
||||
|
||||
static int create_rqt(struct mlx5_vdpa_net *ndev)
|
||||
{
|
||||
int log_max_rqt;
|
||||
__be32 *list;
|
||||
int max_rqt;
|
||||
void *rqtc;
|
||||
int inlen;
|
||||
void *in;
|
||||
int i, j;
|
||||
int err;
|
||||
|
||||
log_max_rqt = min_t(int, 1, MLX5_CAP_GEN(ndev->mvdev.mdev, log_max_rqt_size));
|
||||
if (log_max_rqt < 1)
|
||||
max_rqt = min_t(int, MLX5_MAX_SUPPORTED_VQS / 2,
|
||||
1 << MLX5_CAP_GEN(ndev->mvdev.mdev, log_max_rqt_size));
|
||||
if (max_rqt < 1)
|
||||
return -EOPNOTSUPP;
|
||||
|
||||
inlen = MLX5_ST_SZ_BYTES(create_rqt_in) + (1 << log_max_rqt) * MLX5_ST_SZ_BYTES(rq_num);
|
||||
inlen = MLX5_ST_SZ_BYTES(create_rqt_in) + max_rqt * MLX5_ST_SZ_BYTES(rq_num);
|
||||
in = kzalloc(inlen, GFP_KERNEL);
|
||||
if (!in)
|
||||
return -ENOMEM;
|
||||
@ -1211,10 +1256,9 @@ static int create_rqt(struct mlx5_vdpa_net *ndev)
|
||||
rqtc = MLX5_ADDR_OF(create_rqt_in, in, rqt_context);
|
||||
|
||||
MLX5_SET(rqtc, rqtc, list_q_type, MLX5_RQTC_LIST_Q_TYPE_VIRTIO_NET_Q);
|
||||
MLX5_SET(rqtc, rqtc, rqt_max_size, 1 << log_max_rqt);
|
||||
MLX5_SET(rqtc, rqtc, rqt_actual_size, 1);
|
||||
MLX5_SET(rqtc, rqtc, rqt_max_size, max_rqt);
|
||||
list = MLX5_ADDR_OF(rqtc, rqtc, rq_num[0]);
|
||||
for (i = 0, j = 0; j < ndev->mvdev.max_vqs; j++) {
|
||||
for (i = 0, j = 0; j < max_rqt; j++) {
|
||||
if (!ndev->vqs[j].initialized)
|
||||
continue;
|
||||
|
||||
@ -1223,6 +1267,7 @@ static int create_rqt(struct mlx5_vdpa_net *ndev)
|
||||
i++;
|
||||
}
|
||||
}
|
||||
MLX5_SET(rqtc, rqtc, rqt_actual_size, i);
|
||||
|
||||
err = mlx5_vdpa_create_rqt(&ndev->mvdev, in, inlen, &ndev->res.rqtn);
|
||||
kfree(in);
|
||||
@ -1232,6 +1277,52 @@ static int create_rqt(struct mlx5_vdpa_net *ndev)
|
||||
return 0;
|
||||
}
|
||||
|
||||
#define MLX5_MODIFY_RQT_NUM_RQS ((u64)1)
|
||||
|
||||
static int modify_rqt(struct mlx5_vdpa_net *ndev, int num)
|
||||
{
|
||||
__be32 *list;
|
||||
int max_rqt;
|
||||
void *rqtc;
|
||||
int inlen;
|
||||
void *in;
|
||||
int i, j;
|
||||
int err;
|
||||
|
||||
max_rqt = min_t(int, ndev->cur_num_vqs / 2,
|
||||
1 << MLX5_CAP_GEN(ndev->mvdev.mdev, log_max_rqt_size));
|
||||
if (max_rqt < 1)
|
||||
return -EOPNOTSUPP;
|
||||
|
||||
inlen = MLX5_ST_SZ_BYTES(modify_rqt_in) + max_rqt * MLX5_ST_SZ_BYTES(rq_num);
|
||||
in = kzalloc(inlen, GFP_KERNEL);
|
||||
if (!in)
|
||||
return -ENOMEM;
|
||||
|
||||
MLX5_SET(modify_rqt_in, in, uid, ndev->mvdev.res.uid);
|
||||
MLX5_SET64(modify_rqt_in, in, bitmask, MLX5_MODIFY_RQT_NUM_RQS);
|
||||
rqtc = MLX5_ADDR_OF(modify_rqt_in, in, ctx);
|
||||
MLX5_SET(rqtc, rqtc, list_q_type, MLX5_RQTC_LIST_Q_TYPE_VIRTIO_NET_Q);
|
||||
|
||||
list = MLX5_ADDR_OF(rqtc, rqtc, rq_num[0]);
|
||||
for (i = 0, j = 0; j < num; j++) {
|
||||
if (!ndev->vqs[j].initialized)
|
||||
continue;
|
||||
|
||||
if (!vq_is_tx(ndev->vqs[j].index)) {
|
||||
list[i] = cpu_to_be32(ndev->vqs[j].virtq_id);
|
||||
i++;
|
||||
}
|
||||
}
|
||||
MLX5_SET(rqtc, rqtc, rqt_actual_size, i);
|
||||
err = mlx5_vdpa_modify_rqt(&ndev->mvdev, in, inlen, ndev->res.rqtn);
|
||||
kfree(in);
|
||||
if (err)
|
||||
return err;
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static void destroy_rqt(struct mlx5_vdpa_net *ndev)
|
||||
{
|
||||
mlx5_vdpa_destroy_rqt(&ndev->mvdev, ndev->res.rqtn);
|
||||
@ -1345,12 +1436,206 @@ static void remove_fwd_to_tir(struct mlx5_vdpa_net *ndev)
|
||||
ndev->rx_rule = NULL;
|
||||
}
|
||||
|
||||
static virtio_net_ctrl_ack handle_ctrl_mac(struct mlx5_vdpa_dev *mvdev, u8 cmd)
|
||||
{
|
||||
struct mlx5_vdpa_net *ndev = to_mlx5_vdpa_ndev(mvdev);
|
||||
struct mlx5_control_vq *cvq = &mvdev->cvq;
|
||||
virtio_net_ctrl_ack status = VIRTIO_NET_ERR;
|
||||
struct mlx5_core_dev *pfmdev;
|
||||
size_t read;
|
||||
u8 mac[ETH_ALEN];
|
||||
|
||||
pfmdev = pci_get_drvdata(pci_physfn(mvdev->mdev->pdev));
|
||||
switch (cmd) {
|
||||
case VIRTIO_NET_CTRL_MAC_ADDR_SET:
|
||||
read = vringh_iov_pull_iotlb(&cvq->vring, &cvq->riov, (void *)mac, ETH_ALEN);
|
||||
if (read != ETH_ALEN)
|
||||
break;
|
||||
|
||||
if (!memcmp(ndev->config.mac, mac, 6)) {
|
||||
status = VIRTIO_NET_OK;
|
||||
break;
|
||||
}
|
||||
|
||||
if (!is_zero_ether_addr(ndev->config.mac)) {
|
||||
if (mlx5_mpfs_del_mac(pfmdev, ndev->config.mac)) {
|
||||
mlx5_vdpa_warn(mvdev, "failed to delete old MAC %pM from MPFS table\n",
|
||||
ndev->config.mac);
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
if (mlx5_mpfs_add_mac(pfmdev, mac)) {
|
||||
mlx5_vdpa_warn(mvdev, "failed to insert new MAC %pM into MPFS table\n",
|
||||
mac);
|
||||
break;
|
||||
}
|
||||
|
||||
memcpy(ndev->config.mac, mac, ETH_ALEN);
|
||||
status = VIRTIO_NET_OK;
|
||||
break;
|
||||
|
||||
default:
|
||||
break;
|
||||
}
|
||||
|
||||
return status;
|
||||
}
|
||||
|
||||
static int change_num_qps(struct mlx5_vdpa_dev *mvdev, int newqps)
|
||||
{
|
||||
struct mlx5_vdpa_net *ndev = to_mlx5_vdpa_ndev(mvdev);
|
||||
int cur_qps = ndev->cur_num_vqs / 2;
|
||||
int err;
|
||||
int i;
|
||||
|
||||
if (cur_qps > newqps) {
|
||||
err = modify_rqt(ndev, 2 * newqps);
|
||||
if (err)
|
||||
return err;
|
||||
|
||||
for (i = ndev->cur_num_vqs - 1; i >= 2 * newqps; i--)
|
||||
teardown_vq(ndev, &ndev->vqs[i]);
|
||||
|
||||
ndev->cur_num_vqs = 2 * newqps;
|
||||
} else {
|
||||
ndev->cur_num_vqs = 2 * newqps;
|
||||
for (i = cur_qps * 2; i < 2 * newqps; i++) {
|
||||
err = setup_vq(ndev, &ndev->vqs[i]);
|
||||
if (err)
|
||||
goto clean_added;
|
||||
}
|
||||
err = modify_rqt(ndev, 2 * newqps);
|
||||
if (err)
|
||||
goto clean_added;
|
||||
}
|
||||
return 0;
|
||||
|
||||
clean_added:
|
||||
for (--i; i >= cur_qps; --i)
|
||||
teardown_vq(ndev, &ndev->vqs[i]);
|
||||
|
||||
return err;
|
||||
}
|
||||
|
||||
static virtio_net_ctrl_ack handle_ctrl_mq(struct mlx5_vdpa_dev *mvdev, u8 cmd)
|
||||
{
|
||||
struct mlx5_vdpa_net *ndev = to_mlx5_vdpa_ndev(mvdev);
|
||||
virtio_net_ctrl_ack status = VIRTIO_NET_ERR;
|
||||
struct mlx5_control_vq *cvq = &mvdev->cvq;
|
||||
struct virtio_net_ctrl_mq mq;
|
||||
size_t read;
|
||||
u16 newqps;
|
||||
|
||||
switch (cmd) {
|
||||
case VIRTIO_NET_CTRL_MQ_VQ_PAIRS_SET:
|
||||
read = vringh_iov_pull_iotlb(&cvq->vring, &cvq->riov, (void *)&mq, sizeof(mq));
|
||||
if (read != sizeof(mq))
|
||||
break;
|
||||
|
||||
newqps = mlx5vdpa16_to_cpu(mvdev, mq.virtqueue_pairs);
|
||||
if (ndev->cur_num_vqs == 2 * newqps) {
|
||||
status = VIRTIO_NET_OK;
|
||||
break;
|
||||
}
|
||||
|
||||
if (newqps & (newqps - 1))
|
||||
break;
|
||||
|
||||
if (!change_num_qps(mvdev, newqps))
|
||||
status = VIRTIO_NET_OK;
|
||||
|
||||
break;
|
||||
default:
|
||||
break;
|
||||
}
|
||||
|
||||
return status;
|
||||
}
|
||||
|
||||
static void mlx5_cvq_kick_handler(struct work_struct *work)
|
||||
{
|
||||
virtio_net_ctrl_ack status = VIRTIO_NET_ERR;
|
||||
struct virtio_net_ctrl_hdr ctrl;
|
||||
struct mlx5_ctrl_wq_ent *wqent;
|
||||
struct mlx5_vdpa_dev *mvdev;
|
||||
struct mlx5_control_vq *cvq;
|
||||
struct mlx5_vdpa_net *ndev;
|
||||
size_t read, write;
|
||||
int err;
|
||||
|
||||
wqent = container_of(work, struct mlx5_ctrl_wq_ent, work);
|
||||
mvdev = wqent->mvdev;
|
||||
ndev = to_mlx5_vdpa_ndev(mvdev);
|
||||
cvq = &mvdev->cvq;
|
||||
if (!(ndev->mvdev.actual_features & BIT_ULL(VIRTIO_NET_F_CTRL_VQ)))
|
||||
goto out;
|
||||
|
||||
if (!cvq->ready)
|
||||
goto out;
|
||||
|
||||
while (true) {
|
||||
err = vringh_getdesc_iotlb(&cvq->vring, &cvq->riov, &cvq->wiov, &cvq->head,
|
||||
GFP_ATOMIC);
|
||||
if (err <= 0)
|
||||
break;
|
||||
|
||||
read = vringh_iov_pull_iotlb(&cvq->vring, &cvq->riov, &ctrl, sizeof(ctrl));
|
||||
if (read != sizeof(ctrl))
|
||||
break;
|
||||
|
||||
switch (ctrl.class) {
|
||||
case VIRTIO_NET_CTRL_MAC:
|
||||
status = handle_ctrl_mac(mvdev, ctrl.cmd);
|
||||
break;
|
||||
case VIRTIO_NET_CTRL_MQ:
|
||||
status = handle_ctrl_mq(mvdev, ctrl.cmd);
|
||||
break;
|
||||
|
||||
default:
|
||||
break;
|
||||
}
|
||||
|
||||
/* Make sure data is written before advancing index */
|
||||
smp_wmb();
|
||||
|
||||
write = vringh_iov_push_iotlb(&cvq->vring, &cvq->wiov, &status, sizeof(status));
|
||||
vringh_complete_iotlb(&cvq->vring, cvq->head, write);
|
||||
vringh_kiov_cleanup(&cvq->riov);
|
||||
vringh_kiov_cleanup(&cvq->wiov);
|
||||
|
||||
if (vringh_need_notify_iotlb(&cvq->vring))
|
||||
vringh_notify(&cvq->vring);
|
||||
}
|
||||
out:
|
||||
kfree(wqent);
|
||||
}
|
||||
|
||||
static void mlx5_vdpa_kick_vq(struct vdpa_device *vdev, u16 idx)
|
||||
{
|
||||
struct mlx5_vdpa_dev *mvdev = to_mvdev(vdev);
|
||||
struct mlx5_vdpa_net *ndev = to_mlx5_vdpa_ndev(mvdev);
|
||||
struct mlx5_vdpa_virtqueue *mvq = &ndev->vqs[idx];
|
||||
struct mlx5_vdpa_virtqueue *mvq;
|
||||
struct mlx5_ctrl_wq_ent *wqent;
|
||||
|
||||
if (!is_index_valid(mvdev, idx))
|
||||
return;
|
||||
|
||||
if (unlikely(is_ctrl_vq_idx(mvdev, idx))) {
|
||||
if (!mvdev->cvq.ready)
|
||||
return;
|
||||
|
||||
wqent = kzalloc(sizeof(*wqent), GFP_ATOMIC);
|
||||
if (!wqent)
|
||||
return;
|
||||
|
||||
wqent->mvdev = mvdev;
|
||||
INIT_WORK(&wqent->work, mlx5_cvq_kick_handler);
|
||||
queue_work(mvdev->wq, &wqent->work);
|
||||
return;
|
||||
}
|
||||
|
||||
mvq = &ndev->vqs[idx];
|
||||
if (unlikely(!mvq->ready))
|
||||
return;
|
||||
|
||||
@ -1362,8 +1647,19 @@ static int mlx5_vdpa_set_vq_address(struct vdpa_device *vdev, u16 idx, u64 desc_
|
||||
{
|
||||
struct mlx5_vdpa_dev *mvdev = to_mvdev(vdev);
|
||||
struct mlx5_vdpa_net *ndev = to_mlx5_vdpa_ndev(mvdev);
|
||||
struct mlx5_vdpa_virtqueue *mvq = &ndev->vqs[idx];
|
||||
struct mlx5_vdpa_virtqueue *mvq;
|
||||
|
||||
if (!is_index_valid(mvdev, idx))
|
||||
return -EINVAL;
|
||||
|
||||
if (is_ctrl_vq_idx(mvdev, idx)) {
|
||||
mvdev->cvq.desc_addr = desc_area;
|
||||
mvdev->cvq.device_addr = device_area;
|
||||
mvdev->cvq.driver_addr = driver_area;
|
||||
return 0;
|
||||
}
|
||||
|
||||
mvq = &ndev->vqs[idx];
|
||||
mvq->desc_addr = desc_area;
|
||||
mvq->device_addr = device_area;
|
||||
mvq->driver_addr = driver_area;
|
||||
@ -1376,6 +1672,9 @@ static void mlx5_vdpa_set_vq_num(struct vdpa_device *vdev, u16 idx, u32 num)
|
||||
struct mlx5_vdpa_net *ndev = to_mlx5_vdpa_ndev(mvdev);
|
||||
struct mlx5_vdpa_virtqueue *mvq;
|
||||
|
||||
if (!is_index_valid(mvdev, idx) || is_ctrl_vq_idx(mvdev, idx))
|
||||
return;
|
||||
|
||||
mvq = &ndev->vqs[idx];
|
||||
mvq->num_ent = num;
|
||||
}
|
||||
@ -1384,17 +1683,46 @@ static void mlx5_vdpa_set_vq_cb(struct vdpa_device *vdev, u16 idx, struct vdpa_c
|
||||
{
|
||||
struct mlx5_vdpa_dev *mvdev = to_mvdev(vdev);
|
||||
struct mlx5_vdpa_net *ndev = to_mlx5_vdpa_ndev(mvdev);
|
||||
struct mlx5_vdpa_virtqueue *vq = &ndev->vqs[idx];
|
||||
|
||||
vq->event_cb = *cb;
|
||||
ndev->event_cbs[idx] = *cb;
|
||||
}
|
||||
|
||||
static void mlx5_cvq_notify(struct vringh *vring)
|
||||
{
|
||||
struct mlx5_control_vq *cvq = container_of(vring, struct mlx5_control_vq, vring);
|
||||
|
||||
if (!cvq->event_cb.callback)
|
||||
return;
|
||||
|
||||
cvq->event_cb.callback(cvq->event_cb.private);
|
||||
}
|
||||
|
||||
static void set_cvq_ready(struct mlx5_vdpa_dev *mvdev, bool ready)
|
||||
{
|
||||
struct mlx5_control_vq *cvq = &mvdev->cvq;
|
||||
|
||||
cvq->ready = ready;
|
||||
if (!ready)
|
||||
return;
|
||||
|
||||
cvq->vring.notify = mlx5_cvq_notify;
|
||||
}
|
||||
|
||||
static void mlx5_vdpa_set_vq_ready(struct vdpa_device *vdev, u16 idx, bool ready)
|
||||
{
|
||||
struct mlx5_vdpa_dev *mvdev = to_mvdev(vdev);
|
||||
struct mlx5_vdpa_net *ndev = to_mlx5_vdpa_ndev(mvdev);
|
||||
struct mlx5_vdpa_virtqueue *mvq = &ndev->vqs[idx];
|
||||
struct mlx5_vdpa_virtqueue *mvq;
|
||||
|
||||
if (!is_index_valid(mvdev, idx))
|
||||
return;
|
||||
|
||||
if (is_ctrl_vq_idx(mvdev, idx)) {
|
||||
set_cvq_ready(mvdev, ready);
|
||||
return;
|
||||
}
|
||||
|
||||
mvq = &ndev->vqs[idx];
|
||||
if (!ready)
|
||||
suspend_vq(ndev, mvq);
|
||||
|
||||
@ -1405,9 +1733,14 @@ static bool mlx5_vdpa_get_vq_ready(struct vdpa_device *vdev, u16 idx)
|
||||
{
|
||||
struct mlx5_vdpa_dev *mvdev = to_mvdev(vdev);
|
||||
struct mlx5_vdpa_net *ndev = to_mlx5_vdpa_ndev(mvdev);
|
||||
struct mlx5_vdpa_virtqueue *mvq = &ndev->vqs[idx];
|
||||
|
||||
return mvq->ready;
|
||||
if (!is_index_valid(mvdev, idx))
|
||||
return false;
|
||||
|
||||
if (is_ctrl_vq_idx(mvdev, idx))
|
||||
return mvdev->cvq.ready;
|
||||
|
||||
return ndev->vqs[idx].ready;
|
||||
}
|
||||
|
||||
static int mlx5_vdpa_set_vq_state(struct vdpa_device *vdev, u16 idx,
|
||||
@ -1415,8 +1748,17 @@ static int mlx5_vdpa_set_vq_state(struct vdpa_device *vdev, u16 idx,
|
||||
{
|
||||
struct mlx5_vdpa_dev *mvdev = to_mvdev(vdev);
|
||||
struct mlx5_vdpa_net *ndev = to_mlx5_vdpa_ndev(mvdev);
|
||||
struct mlx5_vdpa_virtqueue *mvq = &ndev->vqs[idx];
|
||||
struct mlx5_vdpa_virtqueue *mvq;
|
||||
|
||||
if (!is_index_valid(mvdev, idx))
|
||||
return -EINVAL;
|
||||
|
||||
if (is_ctrl_vq_idx(mvdev, idx)) {
|
||||
mvdev->cvq.vring.last_avail_idx = state->split.avail_index;
|
||||
return 0;
|
||||
}
|
||||
|
||||
mvq = &ndev->vqs[idx];
|
||||
if (mvq->fw_state == MLX5_VIRTIO_NET_Q_OBJECT_STATE_RDY) {
|
||||
mlx5_vdpa_warn(mvdev, "can't modify available index\n");
|
||||
return -EINVAL;
|
||||
@ -1431,10 +1773,19 @@ static int mlx5_vdpa_get_vq_state(struct vdpa_device *vdev, u16 idx, struct vdpa
|
||||
{
|
||||
struct mlx5_vdpa_dev *mvdev = to_mvdev(vdev);
|
||||
struct mlx5_vdpa_net *ndev = to_mlx5_vdpa_ndev(mvdev);
|
||||
struct mlx5_vdpa_virtqueue *mvq = &ndev->vqs[idx];
|
||||
struct mlx5_vdpa_virtqueue *mvq;
|
||||
struct mlx5_virtq_attr attr;
|
||||
int err;
|
||||
|
||||
if (!is_index_valid(mvdev, idx))
|
||||
return -EINVAL;
|
||||
|
||||
if (is_ctrl_vq_idx(mvdev, idx)) {
|
||||
state->split.avail_index = mvdev->cvq.vring.last_avail_idx;
|
||||
return 0;
|
||||
}
|
||||
|
||||
mvq = &ndev->vqs[idx];
|
||||
/* If the virtq object was destroyed, use the value saved at
|
||||
* the last minute of suspend_vq. This caters for userspace
|
||||
* that cares about emulating the index after vq is stopped.
|
||||
@ -1491,10 +1842,14 @@ static u64 mlx5_vdpa_get_features(struct vdpa_device *vdev)
|
||||
u16 dev_features;
|
||||
|
||||
dev_features = MLX5_CAP_DEV_VDPA_EMULATION(mvdev->mdev, device_features_bits_mask);
|
||||
ndev->mvdev.mlx_features = mlx_to_vritio_features(dev_features);
|
||||
ndev->mvdev.mlx_features |= mlx_to_vritio_features(dev_features);
|
||||
if (MLX5_CAP_DEV_VDPA_EMULATION(mvdev->mdev, virtio_version_1_0))
|
||||
ndev->mvdev.mlx_features |= BIT_ULL(VIRTIO_F_VERSION_1);
|
||||
ndev->mvdev.mlx_features |= BIT_ULL(VIRTIO_F_ACCESS_PLATFORM);
|
||||
ndev->mvdev.mlx_features |= BIT_ULL(VIRTIO_NET_F_CTRL_VQ);
|
||||
ndev->mvdev.mlx_features |= BIT_ULL(VIRTIO_NET_F_CTRL_MAC_ADDR);
|
||||
ndev->mvdev.mlx_features |= BIT_ULL(VIRTIO_NET_F_MQ);
|
||||
|
||||
print_features(mvdev, ndev->mvdev.mlx_features, false);
|
||||
return ndev->mvdev.mlx_features;
|
||||
}
|
||||
@ -1507,17 +1862,29 @@ static int verify_min_features(struct mlx5_vdpa_dev *mvdev, u64 features)
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int setup_virtqueues(struct mlx5_vdpa_net *ndev)
|
||||
static int setup_virtqueues(struct mlx5_vdpa_dev *mvdev)
|
||||
{
|
||||
struct mlx5_vdpa_net *ndev = to_mlx5_vdpa_ndev(mvdev);
|
||||
struct mlx5_control_vq *cvq = &mvdev->cvq;
|
||||
int err;
|
||||
int i;
|
||||
|
||||
for (i = 0; i < 2 * mlx5_vdpa_max_qps(ndev->mvdev.max_vqs); i++) {
|
||||
for (i = 0; i < 2 * mlx5_vdpa_max_qps(mvdev->max_vqs); i++) {
|
||||
err = setup_vq(ndev, &ndev->vqs[i]);
|
||||
if (err)
|
||||
goto err_vq;
|
||||
}
|
||||
|
||||
if (mvdev->actual_features & BIT_ULL(VIRTIO_NET_F_CTRL_VQ)) {
|
||||
err = vringh_init_iotlb(&cvq->vring, mvdev->actual_features,
|
||||
MLX5_CVQ_MAX_ENT, false,
|
||||
(struct vring_desc *)(uintptr_t)cvq->desc_addr,
|
||||
(struct vring_avail *)(uintptr_t)cvq->driver_addr,
|
||||
(struct vring_used *)(uintptr_t)cvq->device_addr);
|
||||
if (err)
|
||||
goto err_vq;
|
||||
}
|
||||
|
||||
return 0;
|
||||
|
||||
err_vq:
|
||||
@ -1541,16 +1908,22 @@ static void teardown_virtqueues(struct mlx5_vdpa_net *ndev)
|
||||
}
|
||||
}
|
||||
|
||||
/* TODO: cross-endian support */
|
||||
static inline bool mlx5_vdpa_is_little_endian(struct mlx5_vdpa_dev *mvdev)
|
||||
static void update_cvq_info(struct mlx5_vdpa_dev *mvdev)
|
||||
{
|
||||
return virtio_legacy_is_little_endian() ||
|
||||
(mvdev->actual_features & BIT_ULL(VIRTIO_F_VERSION_1));
|
||||
}
|
||||
|
||||
static __virtio16 cpu_to_mlx5vdpa16(struct mlx5_vdpa_dev *mvdev, u16 val)
|
||||
{
|
||||
return __cpu_to_virtio16(mlx5_vdpa_is_little_endian(mvdev), val);
|
||||
if (MLX5_FEATURE(mvdev, VIRTIO_NET_F_CTRL_VQ)) {
|
||||
if (MLX5_FEATURE(mvdev, VIRTIO_NET_F_MQ)) {
|
||||
/* MQ supported. CVQ index is right above the last data virtqueue's */
|
||||
mvdev->max_idx = mvdev->max_vqs;
|
||||
} else {
|
||||
/* Only CVQ supportted. data virtqueues occupy indices 0 and 1.
|
||||
* CVQ gets index 2
|
||||
*/
|
||||
mvdev->max_idx = 2;
|
||||
}
|
||||
} else {
|
||||
/* Two data virtqueues only: one for rx and one for tx */
|
||||
mvdev->max_idx = 1;
|
||||
}
|
||||
}
|
||||
|
||||
static int mlx5_vdpa_set_features(struct vdpa_device *vdev, u64 features)
|
||||
@ -1568,6 +1941,7 @@ static int mlx5_vdpa_set_features(struct vdpa_device *vdev, u64 features)
|
||||
ndev->mvdev.actual_features = features & ndev->mvdev.mlx_features;
|
||||
ndev->config.mtu = cpu_to_mlx5vdpa16(mvdev, ndev->mtu);
|
||||
ndev->config.status |= cpu_to_mlx5vdpa16(mvdev, VIRTIO_NET_S_LINK_UP);
|
||||
update_cvq_info(mvdev);
|
||||
return err;
|
||||
}
|
||||
|
||||
@ -1605,15 +1979,14 @@ static u8 mlx5_vdpa_get_status(struct vdpa_device *vdev)
|
||||
static int save_channel_info(struct mlx5_vdpa_net *ndev, struct mlx5_vdpa_virtqueue *mvq)
|
||||
{
|
||||
struct mlx5_vq_restore_info *ri = &mvq->ri;
|
||||
struct mlx5_virtq_attr attr;
|
||||
struct mlx5_virtq_attr attr = {};
|
||||
int err;
|
||||
|
||||
if (!mvq->initialized)
|
||||
return 0;
|
||||
|
||||
err = query_virtqueue(ndev, mvq, &attr);
|
||||
if (err)
|
||||
return err;
|
||||
if (mvq->initialized) {
|
||||
err = query_virtqueue(ndev, mvq, &attr);
|
||||
if (err)
|
||||
return err;
|
||||
}
|
||||
|
||||
ri->avail_index = attr.available_index;
|
||||
ri->used_index = attr.used_index;
|
||||
@ -1622,7 +1995,6 @@ static int save_channel_info(struct mlx5_vdpa_net *ndev, struct mlx5_vdpa_virtqu
|
||||
ri->desc_addr = mvq->desc_addr;
|
||||
ri->device_addr = mvq->device_addr;
|
||||
ri->driver_addr = mvq->driver_addr;
|
||||
ri->cb = mvq->event_cb;
|
||||
ri->restore = true;
|
||||
return 0;
|
||||
}
|
||||
@ -1667,12 +2039,12 @@ static void restore_channels_info(struct mlx5_vdpa_net *ndev)
|
||||
mvq->desc_addr = ri->desc_addr;
|
||||
mvq->device_addr = ri->device_addr;
|
||||
mvq->driver_addr = ri->driver_addr;
|
||||
mvq->event_cb = ri->cb;
|
||||
}
|
||||
}
|
||||
|
||||
static int mlx5_vdpa_change_map(struct mlx5_vdpa_net *ndev, struct vhost_iotlb *iotlb)
|
||||
static int mlx5_vdpa_change_map(struct mlx5_vdpa_dev *mvdev, struct vhost_iotlb *iotlb)
|
||||
{
|
||||
struct mlx5_vdpa_net *ndev = to_mlx5_vdpa_ndev(mvdev);
|
||||
int err;
|
||||
|
||||
suspend_vqs(ndev);
|
||||
@ -1681,58 +2053,59 @@ static int mlx5_vdpa_change_map(struct mlx5_vdpa_net *ndev, struct vhost_iotlb *
|
||||
goto err_mr;
|
||||
|
||||
teardown_driver(ndev);
|
||||
mlx5_vdpa_destroy_mr(&ndev->mvdev);
|
||||
err = mlx5_vdpa_create_mr(&ndev->mvdev, iotlb);
|
||||
mlx5_vdpa_destroy_mr(mvdev);
|
||||
err = mlx5_vdpa_create_mr(mvdev, iotlb);
|
||||
if (err)
|
||||
goto err_mr;
|
||||
|
||||
if (!(ndev->mvdev.status & VIRTIO_CONFIG_S_DRIVER_OK))
|
||||
if (!(mvdev->status & VIRTIO_CONFIG_S_DRIVER_OK))
|
||||
return 0;
|
||||
|
||||
restore_channels_info(ndev);
|
||||
err = setup_driver(ndev);
|
||||
err = setup_driver(mvdev);
|
||||
if (err)
|
||||
goto err_setup;
|
||||
|
||||
return 0;
|
||||
|
||||
err_setup:
|
||||
mlx5_vdpa_destroy_mr(&ndev->mvdev);
|
||||
mlx5_vdpa_destroy_mr(mvdev);
|
||||
err_mr:
|
||||
return err;
|
||||
}
|
||||
|
||||
static int setup_driver(struct mlx5_vdpa_net *ndev)
|
||||
static int setup_driver(struct mlx5_vdpa_dev *mvdev)
|
||||
{
|
||||
struct mlx5_vdpa_net *ndev = to_mlx5_vdpa_ndev(mvdev);
|
||||
int err;
|
||||
|
||||
mutex_lock(&ndev->reslock);
|
||||
if (ndev->setup) {
|
||||
mlx5_vdpa_warn(&ndev->mvdev, "setup driver called for already setup driver\n");
|
||||
mlx5_vdpa_warn(mvdev, "setup driver called for already setup driver\n");
|
||||
err = 0;
|
||||
goto out;
|
||||
}
|
||||
err = setup_virtqueues(ndev);
|
||||
err = setup_virtqueues(mvdev);
|
||||
if (err) {
|
||||
mlx5_vdpa_warn(&ndev->mvdev, "setup_virtqueues\n");
|
||||
mlx5_vdpa_warn(mvdev, "setup_virtqueues\n");
|
||||
goto out;
|
||||
}
|
||||
|
||||
err = create_rqt(ndev);
|
||||
if (err) {
|
||||
mlx5_vdpa_warn(&ndev->mvdev, "create_rqt\n");
|
||||
mlx5_vdpa_warn(mvdev, "create_rqt\n");
|
||||
goto err_rqt;
|
||||
}
|
||||
|
||||
err = create_tir(ndev);
|
||||
if (err) {
|
||||
mlx5_vdpa_warn(&ndev->mvdev, "create_tir\n");
|
||||
mlx5_vdpa_warn(mvdev, "create_tir\n");
|
||||
goto err_tir;
|
||||
}
|
||||
|
||||
err = add_fwd_to_tir(ndev);
|
||||
if (err) {
|
||||
mlx5_vdpa_warn(&ndev->mvdev, "add_fwd_to_tir\n");
|
||||
mlx5_vdpa_warn(mvdev, "add_fwd_to_tir\n");
|
||||
goto err_fwd;
|
||||
}
|
||||
ndev->setup = true;
|
||||
@ -1781,24 +2154,10 @@ static void mlx5_vdpa_set_status(struct vdpa_device *vdev, u8 status)
|
||||
int err;
|
||||
|
||||
print_status(mvdev, status, true);
|
||||
if (!status) {
|
||||
mlx5_vdpa_info(mvdev, "performing device reset\n");
|
||||
teardown_driver(ndev);
|
||||
clear_vqs_ready(ndev);
|
||||
mlx5_vdpa_destroy_mr(&ndev->mvdev);
|
||||
ndev->mvdev.status = 0;
|
||||
ndev->mvdev.mlx_features = 0;
|
||||
++mvdev->generation;
|
||||
if (MLX5_CAP_GEN(mvdev->mdev, umem_uid_0)) {
|
||||
if (mlx5_vdpa_create_mr(mvdev, NULL))
|
||||
mlx5_vdpa_warn(mvdev, "create MR failed\n");
|
||||
}
|
||||
return;
|
||||
}
|
||||
|
||||
if ((status ^ ndev->mvdev.status) & VIRTIO_CONFIG_S_DRIVER_OK) {
|
||||
if (status & VIRTIO_CONFIG_S_DRIVER_OK) {
|
||||
err = setup_driver(ndev);
|
||||
err = setup_driver(mvdev);
|
||||
if (err) {
|
||||
mlx5_vdpa_warn(mvdev, "failed to setup driver\n");
|
||||
goto err_setup;
|
||||
@ -1817,6 +2176,29 @@ err_setup:
|
||||
ndev->mvdev.status |= VIRTIO_CONFIG_S_FAILED;
|
||||
}
|
||||
|
||||
static int mlx5_vdpa_reset(struct vdpa_device *vdev)
|
||||
{
|
||||
struct mlx5_vdpa_dev *mvdev = to_mvdev(vdev);
|
||||
struct mlx5_vdpa_net *ndev = to_mlx5_vdpa_ndev(mvdev);
|
||||
|
||||
print_status(mvdev, 0, true);
|
||||
mlx5_vdpa_info(mvdev, "performing device reset\n");
|
||||
teardown_driver(ndev);
|
||||
clear_vqs_ready(ndev);
|
||||
mlx5_vdpa_destroy_mr(&ndev->mvdev);
|
||||
ndev->mvdev.status = 0;
|
||||
ndev->mvdev.mlx_features = 0;
|
||||
memset(ndev->event_cbs, 0, sizeof(ndev->event_cbs));
|
||||
ndev->mvdev.actual_features = 0;
|
||||
++mvdev->generation;
|
||||
if (MLX5_CAP_GEN(mvdev->mdev, umem_uid_0)) {
|
||||
if (mlx5_vdpa_create_mr(mvdev, NULL))
|
||||
mlx5_vdpa_warn(mvdev, "create MR failed\n");
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static size_t mlx5_vdpa_get_config_size(struct vdpa_device *vdev)
|
||||
{
|
||||
return sizeof(struct virtio_net_config);
|
||||
@ -1848,7 +2230,6 @@ static u32 mlx5_vdpa_get_generation(struct vdpa_device *vdev)
|
||||
static int mlx5_vdpa_set_map(struct vdpa_device *vdev, struct vhost_iotlb *iotlb)
|
||||
{
|
||||
struct mlx5_vdpa_dev *mvdev = to_mvdev(vdev);
|
||||
struct mlx5_vdpa_net *ndev = to_mlx5_vdpa_ndev(mvdev);
|
||||
bool change_map;
|
||||
int err;
|
||||
|
||||
@ -1859,7 +2240,7 @@ static int mlx5_vdpa_set_map(struct vdpa_device *vdev, struct vhost_iotlb *iotlb
|
||||
}
|
||||
|
||||
if (change_map)
|
||||
return mlx5_vdpa_change_map(ndev, iotlb);
|
||||
return mlx5_vdpa_change_map(mvdev, iotlb);
|
||||
|
||||
return 0;
|
||||
}
|
||||
@ -1889,6 +2270,9 @@ static struct vdpa_notification_area mlx5_get_vq_notification(struct vdpa_device
|
||||
struct mlx5_vdpa_net *ndev;
|
||||
phys_addr_t addr;
|
||||
|
||||
if (!is_index_valid(mvdev, idx) || is_ctrl_vq_idx(mvdev, idx))
|
||||
return ret;
|
||||
|
||||
/* If SF BAR size is smaller than PAGE_SIZE, do not use direct
|
||||
* notification to avoid the risk of mapping pages that contain BAR of more
|
||||
* than one SF
|
||||
@ -1928,6 +2312,7 @@ static const struct vdpa_config_ops mlx5_vdpa_ops = {
|
||||
.get_vendor_id = mlx5_vdpa_get_vendor_id,
|
||||
.get_status = mlx5_vdpa_get_status,
|
||||
.set_status = mlx5_vdpa_set_status,
|
||||
.reset = mlx5_vdpa_reset,
|
||||
.get_config_size = mlx5_vdpa_get_config_size,
|
||||
.get_config = mlx5_vdpa_get_config,
|
||||
.set_config = mlx5_vdpa_set_config,
|
||||
@ -2040,7 +2425,7 @@ static int mlx5_vdpa_dev_add(struct vdpa_mgmt_dev *v_mdev, const char *name)
|
||||
max_vqs = min_t(u32, max_vqs, MLX5_MAX_SUPPORTED_VQS);
|
||||
|
||||
ndev = vdpa_alloc_device(struct mlx5_vdpa_net, mvdev.vdev, mdev->device, &mlx5_vdpa_ops,
|
||||
name);
|
||||
name, false);
|
||||
if (IS_ERR(ndev))
|
||||
return PTR_ERR(ndev);
|
||||
|
||||
@ -2063,8 +2448,11 @@ static int mlx5_vdpa_dev_add(struct vdpa_mgmt_dev *v_mdev, const char *name)
|
||||
err = mlx5_mpfs_add_mac(pfmdev, config->mac);
|
||||
if (err)
|
||||
goto err_mtu;
|
||||
|
||||
ndev->mvdev.mlx_features |= BIT_ULL(VIRTIO_NET_F_MAC);
|
||||
}
|
||||
|
||||
config->max_virtqueue_pairs = cpu_to_mlx5vdpa16(mvdev, mlx5_vdpa_max_qps(max_vqs));
|
||||
mvdev->vdev.dma_dev = &mdev->pdev->dev;
|
||||
err = mlx5_vdpa_alloc_resources(&ndev->mvdev);
|
||||
if (err)
|
||||
@ -2080,8 +2468,15 @@ static int mlx5_vdpa_dev_add(struct vdpa_mgmt_dev *v_mdev, const char *name)
|
||||
if (err)
|
||||
goto err_mr;
|
||||
|
||||
mvdev->wq = create_singlethread_workqueue("mlx5_vdpa_ctrl_wq");
|
||||
if (!mvdev->wq) {
|
||||
err = -ENOMEM;
|
||||
goto err_res2;
|
||||
}
|
||||
|
||||
ndev->cur_num_vqs = 2 * mlx5_vdpa_max_qps(max_vqs);
|
||||
mvdev->vdev.mdev = &mgtdev->mgtdev;
|
||||
err = _vdpa_register_device(&mvdev->vdev, 2 * mlx5_vdpa_max_qps(max_vqs));
|
||||
err = _vdpa_register_device(&mvdev->vdev, ndev->cur_num_vqs + 1);
|
||||
if (err)
|
||||
goto err_reg;
|
||||
|
||||
@ -2089,6 +2484,8 @@ static int mlx5_vdpa_dev_add(struct vdpa_mgmt_dev *v_mdev, const char *name)
|
||||
return 0;
|
||||
|
||||
err_reg:
|
||||
destroy_workqueue(mvdev->wq);
|
||||
err_res2:
|
||||
free_resources(ndev);
|
||||
err_mr:
|
||||
mlx5_vdpa_destroy_mr(mvdev);
|
||||
@ -2106,7 +2503,9 @@ err_mtu:
|
||||
static void mlx5_vdpa_dev_del(struct vdpa_mgmt_dev *v_mdev, struct vdpa_device *dev)
|
||||
{
|
||||
struct mlx5_vdpa_mgmtdev *mgtdev = container_of(v_mdev, struct mlx5_vdpa_mgmtdev, mgtdev);
|
||||
struct mlx5_vdpa_dev *mvdev = to_mvdev(dev);
|
||||
|
||||
destroy_workqueue(mvdev->wq);
|
||||
_vdpa_unregister_device(dev);
|
||||
mgtdev->ndev = NULL;
|
||||
}
|
||||
|
@ -69,6 +69,7 @@ static void vdpa_release_dev(struct device *d)
|
||||
* @config: the bus operations that is supported by this device
|
||||
* @size: size of the parent structure that contains private data
|
||||
* @name: name of the vdpa device; optional.
|
||||
* @use_va: indicate whether virtual address must be used by this device
|
||||
*
|
||||
* Driver should use vdpa_alloc_device() wrapper macro instead of
|
||||
* using this directly.
|
||||
@ -78,7 +79,8 @@ static void vdpa_release_dev(struct device *d)
|
||||
*/
|
||||
struct vdpa_device *__vdpa_alloc_device(struct device *parent,
|
||||
const struct vdpa_config_ops *config,
|
||||
size_t size, const char *name)
|
||||
size_t size, const char *name,
|
||||
bool use_va)
|
||||
{
|
||||
struct vdpa_device *vdev;
|
||||
int err = -EINVAL;
|
||||
@ -89,6 +91,10 @@ struct vdpa_device *__vdpa_alloc_device(struct device *parent,
|
||||
if (!!config->dma_map != !!config->dma_unmap)
|
||||
goto err;
|
||||
|
||||
/* It should only work for the device that use on-chip IOMMU */
|
||||
if (use_va && !(config->dma_map || config->set_map))
|
||||
goto err;
|
||||
|
||||
err = -ENOMEM;
|
||||
vdev = kzalloc(size, GFP_KERNEL);
|
||||
if (!vdev)
|
||||
@ -104,6 +110,7 @@ struct vdpa_device *__vdpa_alloc_device(struct device *parent,
|
||||
vdev->index = err;
|
||||
vdev->config = config;
|
||||
vdev->features_valid = false;
|
||||
vdev->use_va = use_va;
|
||||
|
||||
if (name)
|
||||
err = dev_set_name(&vdev->dev, "%s", name);
|
||||
|
@ -92,7 +92,7 @@ static void vdpasim_vq_reset(struct vdpasim *vdpasim,
|
||||
vq->vring.notify = NULL;
|
||||
}
|
||||
|
||||
static void vdpasim_reset(struct vdpasim *vdpasim)
|
||||
static void vdpasim_do_reset(struct vdpasim *vdpasim)
|
||||
{
|
||||
int i;
|
||||
|
||||
@ -137,7 +137,8 @@ static dma_addr_t vdpasim_map_range(struct vdpasim *vdpasim, phys_addr_t paddr,
|
||||
int ret;
|
||||
|
||||
/* We set the limit_pfn to the maximum (ULONG_MAX - 1) */
|
||||
iova = alloc_iova(&vdpasim->iova, size, ULONG_MAX - 1, true);
|
||||
iova = alloc_iova(&vdpasim->iova, size >> iova_shift(&vdpasim->iova),
|
||||
ULONG_MAX - 1, true);
|
||||
if (!iova)
|
||||
return DMA_MAPPING_ERROR;
|
||||
|
||||
@ -250,7 +251,7 @@ struct vdpasim *vdpasim_create(struct vdpasim_dev_attr *dev_attr)
|
||||
ops = &vdpasim_config_ops;
|
||||
|
||||
vdpasim = vdpa_alloc_device(struct vdpasim, vdpa, NULL, ops,
|
||||
dev_attr->name);
|
||||
dev_attr->name, false);
|
||||
if (IS_ERR(vdpasim)) {
|
||||
ret = PTR_ERR(vdpasim);
|
||||
goto err_alloc;
|
||||
@ -459,11 +460,21 @@ static void vdpasim_set_status(struct vdpa_device *vdpa, u8 status)
|
||||
|
||||
spin_lock(&vdpasim->lock);
|
||||
vdpasim->status = status;
|
||||
if (status == 0)
|
||||
vdpasim_reset(vdpasim);
|
||||
spin_unlock(&vdpasim->lock);
|
||||
}
|
||||
|
||||
static int vdpasim_reset(struct vdpa_device *vdpa)
|
||||
{
|
||||
struct vdpasim *vdpasim = vdpa_to_sim(vdpa);
|
||||
|
||||
spin_lock(&vdpasim->lock);
|
||||
vdpasim->status = 0;
|
||||
vdpasim_do_reset(vdpasim);
|
||||
spin_unlock(&vdpasim->lock);
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static size_t vdpasim_get_config_size(struct vdpa_device *vdpa)
|
||||
{
|
||||
struct vdpasim *vdpasim = vdpa_to_sim(vdpa);
|
||||
@ -544,14 +555,14 @@ err:
|
||||
}
|
||||
|
||||
static int vdpasim_dma_map(struct vdpa_device *vdpa, u64 iova, u64 size,
|
||||
u64 pa, u32 perm)
|
||||
u64 pa, u32 perm, void *opaque)
|
||||
{
|
||||
struct vdpasim *vdpasim = vdpa_to_sim(vdpa);
|
||||
int ret;
|
||||
|
||||
spin_lock(&vdpasim->iommu_lock);
|
||||
ret = vhost_iotlb_add_range(vdpasim->iommu, iova, iova + size - 1, pa,
|
||||
perm);
|
||||
ret = vhost_iotlb_add_range_ctx(vdpasim->iommu, iova, iova + size - 1,
|
||||
pa, perm, opaque);
|
||||
spin_unlock(&vdpasim->iommu_lock);
|
||||
|
||||
return ret;
|
||||
@ -607,6 +618,7 @@ static const struct vdpa_config_ops vdpasim_config_ops = {
|
||||
.get_vendor_id = vdpasim_get_vendor_id,
|
||||
.get_status = vdpasim_get_status,
|
||||
.set_status = vdpasim_set_status,
|
||||
.reset = vdpasim_reset,
|
||||
.get_config_size = vdpasim_get_config_size,
|
||||
.get_config = vdpasim_get_config,
|
||||
.set_config = vdpasim_set_config,
|
||||
@ -635,6 +647,7 @@ static const struct vdpa_config_ops vdpasim_batch_config_ops = {
|
||||
.get_vendor_id = vdpasim_get_vendor_id,
|
||||
.get_status = vdpasim_get_status,
|
||||
.set_status = vdpasim_set_status,
|
||||
.reset = vdpasim_reset,
|
||||
.get_config_size = vdpasim_get_config_size,
|
||||
.get_config = vdpasim_get_config,
|
||||
.set_config = vdpasim_set_config,
|
||||
|
5
drivers/vdpa/vdpa_user/Makefile
Normal file
5
drivers/vdpa/vdpa_user/Makefile
Normal file
@ -0,0 +1,5 @@
|
||||
# SPDX-License-Identifier: GPL-2.0
|
||||
|
||||
vduse-y := vduse_dev.o iova_domain.o
|
||||
|
||||
obj-$(CONFIG_VDPA_USER) += vduse.o
|
545
drivers/vdpa/vdpa_user/iova_domain.c
Normal file
545
drivers/vdpa/vdpa_user/iova_domain.c
Normal file
@ -0,0 +1,545 @@
|
||||
// SPDX-License-Identifier: GPL-2.0-only
|
||||
/*
|
||||
* MMU-based software IOTLB.
|
||||
*
|
||||
* Copyright (C) 2020-2021 Bytedance Inc. and/or its affiliates. All rights reserved.
|
||||
*
|
||||
* Author: Xie Yongji <xieyongji@bytedance.com>
|
||||
*
|
||||
*/
|
||||
|
||||
#include <linux/slab.h>
|
||||
#include <linux/file.h>
|
||||
#include <linux/anon_inodes.h>
|
||||
#include <linux/highmem.h>
|
||||
#include <linux/vmalloc.h>
|
||||
#include <linux/vdpa.h>
|
||||
|
||||
#include "iova_domain.h"
|
||||
|
||||
static int vduse_iotlb_add_range(struct vduse_iova_domain *domain,
|
||||
u64 start, u64 last,
|
||||
u64 addr, unsigned int perm,
|
||||
struct file *file, u64 offset)
|
||||
{
|
||||
struct vdpa_map_file *map_file;
|
||||
int ret;
|
||||
|
||||
map_file = kmalloc(sizeof(*map_file), GFP_ATOMIC);
|
||||
if (!map_file)
|
||||
return -ENOMEM;
|
||||
|
||||
map_file->file = get_file(file);
|
||||
map_file->offset = offset;
|
||||
|
||||
ret = vhost_iotlb_add_range_ctx(domain->iotlb, start, last,
|
||||
addr, perm, map_file);
|
||||
if (ret) {
|
||||
fput(map_file->file);
|
||||
kfree(map_file);
|
||||
return ret;
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
static void vduse_iotlb_del_range(struct vduse_iova_domain *domain,
|
||||
u64 start, u64 last)
|
||||
{
|
||||
struct vdpa_map_file *map_file;
|
||||
struct vhost_iotlb_map *map;
|
||||
|
||||
while ((map = vhost_iotlb_itree_first(domain->iotlb, start, last))) {
|
||||
map_file = (struct vdpa_map_file *)map->opaque;
|
||||
fput(map_file->file);
|
||||
kfree(map_file);
|
||||
vhost_iotlb_map_free(domain->iotlb, map);
|
||||
}
|
||||
}
|
||||
|
||||
int vduse_domain_set_map(struct vduse_iova_domain *domain,
|
||||
struct vhost_iotlb *iotlb)
|
||||
{
|
||||
struct vdpa_map_file *map_file;
|
||||
struct vhost_iotlb_map *map;
|
||||
u64 start = 0ULL, last = ULLONG_MAX;
|
||||
int ret;
|
||||
|
||||
spin_lock(&domain->iotlb_lock);
|
||||
vduse_iotlb_del_range(domain, start, last);
|
||||
|
||||
for (map = vhost_iotlb_itree_first(iotlb, start, last); map;
|
||||
map = vhost_iotlb_itree_next(map, start, last)) {
|
||||
map_file = (struct vdpa_map_file *)map->opaque;
|
||||
ret = vduse_iotlb_add_range(domain, map->start, map->last,
|
||||
map->addr, map->perm,
|
||||
map_file->file,
|
||||
map_file->offset);
|
||||
if (ret)
|
||||
goto err;
|
||||
}
|
||||
spin_unlock(&domain->iotlb_lock);
|
||||
|
||||
return 0;
|
||||
err:
|
||||
vduse_iotlb_del_range(domain, start, last);
|
||||
spin_unlock(&domain->iotlb_lock);
|
||||
return ret;
|
||||
}
|
||||
|
||||
void vduse_domain_clear_map(struct vduse_iova_domain *domain,
|
||||
struct vhost_iotlb *iotlb)
|
||||
{
|
||||
struct vhost_iotlb_map *map;
|
||||
u64 start = 0ULL, last = ULLONG_MAX;
|
||||
|
||||
spin_lock(&domain->iotlb_lock);
|
||||
for (map = vhost_iotlb_itree_first(iotlb, start, last); map;
|
||||
map = vhost_iotlb_itree_next(map, start, last)) {
|
||||
vduse_iotlb_del_range(domain, map->start, map->last);
|
||||
}
|
||||
spin_unlock(&domain->iotlb_lock);
|
||||
}
|
||||
|
||||
static int vduse_domain_map_bounce_page(struct vduse_iova_domain *domain,
|
||||
u64 iova, u64 size, u64 paddr)
|
||||
{
|
||||
struct vduse_bounce_map *map;
|
||||
u64 last = iova + size - 1;
|
||||
|
||||
while (iova <= last) {
|
||||
map = &domain->bounce_maps[iova >> PAGE_SHIFT];
|
||||
if (!map->bounce_page) {
|
||||
map->bounce_page = alloc_page(GFP_ATOMIC);
|
||||
if (!map->bounce_page)
|
||||
return -ENOMEM;
|
||||
}
|
||||
map->orig_phys = paddr;
|
||||
paddr += PAGE_SIZE;
|
||||
iova += PAGE_SIZE;
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
static void vduse_domain_unmap_bounce_page(struct vduse_iova_domain *domain,
|
||||
u64 iova, u64 size)
|
||||
{
|
||||
struct vduse_bounce_map *map;
|
||||
u64 last = iova + size - 1;
|
||||
|
||||
while (iova <= last) {
|
||||
map = &domain->bounce_maps[iova >> PAGE_SHIFT];
|
||||
map->orig_phys = INVALID_PHYS_ADDR;
|
||||
iova += PAGE_SIZE;
|
||||
}
|
||||
}
|
||||
|
||||
static void do_bounce(phys_addr_t orig, void *addr, size_t size,
|
||||
enum dma_data_direction dir)
|
||||
{
|
||||
unsigned long pfn = PFN_DOWN(orig);
|
||||
unsigned int offset = offset_in_page(orig);
|
||||
char *buffer;
|
||||
unsigned int sz = 0;
|
||||
|
||||
while (size) {
|
||||
sz = min_t(size_t, PAGE_SIZE - offset, size);
|
||||
|
||||
buffer = kmap_atomic(pfn_to_page(pfn));
|
||||
if (dir == DMA_TO_DEVICE)
|
||||
memcpy(addr, buffer + offset, sz);
|
||||
else
|
||||
memcpy(buffer + offset, addr, sz);
|
||||
kunmap_atomic(buffer);
|
||||
|
||||
size -= sz;
|
||||
pfn++;
|
||||
addr += sz;
|
||||
offset = 0;
|
||||
}
|
||||
}
|
||||
|
||||
static void vduse_domain_bounce(struct vduse_iova_domain *domain,
|
||||
dma_addr_t iova, size_t size,
|
||||
enum dma_data_direction dir)
|
||||
{
|
||||
struct vduse_bounce_map *map;
|
||||
unsigned int offset;
|
||||
void *addr;
|
||||
size_t sz;
|
||||
|
||||
if (iova >= domain->bounce_size)
|
||||
return;
|
||||
|
||||
while (size) {
|
||||
map = &domain->bounce_maps[iova >> PAGE_SHIFT];
|
||||
offset = offset_in_page(iova);
|
||||
sz = min_t(size_t, PAGE_SIZE - offset, size);
|
||||
|
||||
if (WARN_ON(!map->bounce_page ||
|
||||
map->orig_phys == INVALID_PHYS_ADDR))
|
||||
return;
|
||||
|
||||
addr = page_address(map->bounce_page) + offset;
|
||||
do_bounce(map->orig_phys + offset, addr, sz, dir);
|
||||
size -= sz;
|
||||
iova += sz;
|
||||
}
|
||||
}
|
||||
|
||||
static struct page *
|
||||
vduse_domain_get_coherent_page(struct vduse_iova_domain *domain, u64 iova)
|
||||
{
|
||||
u64 start = iova & PAGE_MASK;
|
||||
u64 last = start + PAGE_SIZE - 1;
|
||||
struct vhost_iotlb_map *map;
|
||||
struct page *page = NULL;
|
||||
|
||||
spin_lock(&domain->iotlb_lock);
|
||||
map = vhost_iotlb_itree_first(domain->iotlb, start, last);
|
||||
if (!map)
|
||||
goto out;
|
||||
|
||||
page = pfn_to_page((map->addr + iova - map->start) >> PAGE_SHIFT);
|
||||
get_page(page);
|
||||
out:
|
||||
spin_unlock(&domain->iotlb_lock);
|
||||
|
||||
return page;
|
||||
}
|
||||
|
||||
static struct page *
|
||||
vduse_domain_get_bounce_page(struct vduse_iova_domain *domain, u64 iova)
|
||||
{
|
||||
struct vduse_bounce_map *map;
|
||||
struct page *page = NULL;
|
||||
|
||||
spin_lock(&domain->iotlb_lock);
|
||||
map = &domain->bounce_maps[iova >> PAGE_SHIFT];
|
||||
if (!map->bounce_page)
|
||||
goto out;
|
||||
|
||||
page = map->bounce_page;
|
||||
get_page(page);
|
||||
out:
|
||||
spin_unlock(&domain->iotlb_lock);
|
||||
|
||||
return page;
|
||||
}
|
||||
|
||||
static void
|
||||
vduse_domain_free_bounce_pages(struct vduse_iova_domain *domain)
|
||||
{
|
||||
struct vduse_bounce_map *map;
|
||||
unsigned long pfn, bounce_pfns;
|
||||
|
||||
bounce_pfns = domain->bounce_size >> PAGE_SHIFT;
|
||||
|
||||
for (pfn = 0; pfn < bounce_pfns; pfn++) {
|
||||
map = &domain->bounce_maps[pfn];
|
||||
if (WARN_ON(map->orig_phys != INVALID_PHYS_ADDR))
|
||||
continue;
|
||||
|
||||
if (!map->bounce_page)
|
||||
continue;
|
||||
|
||||
__free_page(map->bounce_page);
|
||||
map->bounce_page = NULL;
|
||||
}
|
||||
}
|
||||
|
||||
void vduse_domain_reset_bounce_map(struct vduse_iova_domain *domain)
|
||||
{
|
||||
if (!domain->bounce_map)
|
||||
return;
|
||||
|
||||
spin_lock(&domain->iotlb_lock);
|
||||
if (!domain->bounce_map)
|
||||
goto unlock;
|
||||
|
||||
vduse_iotlb_del_range(domain, 0, domain->bounce_size - 1);
|
||||
domain->bounce_map = 0;
|
||||
unlock:
|
||||
spin_unlock(&domain->iotlb_lock);
|
||||
}
|
||||
|
||||
static int vduse_domain_init_bounce_map(struct vduse_iova_domain *domain)
|
||||
{
|
||||
int ret = 0;
|
||||
|
||||
if (domain->bounce_map)
|
||||
return 0;
|
||||
|
||||
spin_lock(&domain->iotlb_lock);
|
||||
if (domain->bounce_map)
|
||||
goto unlock;
|
||||
|
||||
ret = vduse_iotlb_add_range(domain, 0, domain->bounce_size - 1,
|
||||
0, VHOST_MAP_RW, domain->file, 0);
|
||||
if (ret)
|
||||
goto unlock;
|
||||
|
||||
domain->bounce_map = 1;
|
||||
unlock:
|
||||
spin_unlock(&domain->iotlb_lock);
|
||||
return ret;
|
||||
}
|
||||
|
||||
static dma_addr_t
|
||||
vduse_domain_alloc_iova(struct iova_domain *iovad,
|
||||
unsigned long size, unsigned long limit)
|
||||
{
|
||||
unsigned long shift = iova_shift(iovad);
|
||||
unsigned long iova_len = iova_align(iovad, size) >> shift;
|
||||
unsigned long iova_pfn;
|
||||
|
||||
/*
|
||||
* Freeing non-power-of-two-sized allocations back into the IOVA caches
|
||||
* will come back to bite us badly, so we have to waste a bit of space
|
||||
* rounding up anything cacheable to make sure that can't happen. The
|
||||
* order of the unadjusted size will still match upon freeing.
|
||||
*/
|
||||
if (iova_len < (1 << (IOVA_RANGE_CACHE_MAX_SIZE - 1)))
|
||||
iova_len = roundup_pow_of_two(iova_len);
|
||||
iova_pfn = alloc_iova_fast(iovad, iova_len, limit >> shift, true);
|
||||
|
||||
return iova_pfn << shift;
|
||||
}
|
||||
|
||||
static void vduse_domain_free_iova(struct iova_domain *iovad,
|
||||
dma_addr_t iova, size_t size)
|
||||
{
|
||||
unsigned long shift = iova_shift(iovad);
|
||||
unsigned long iova_len = iova_align(iovad, size) >> shift;
|
||||
|
||||
free_iova_fast(iovad, iova >> shift, iova_len);
|
||||
}
|
||||
|
||||
dma_addr_t vduse_domain_map_page(struct vduse_iova_domain *domain,
|
||||
struct page *page, unsigned long offset,
|
||||
size_t size, enum dma_data_direction dir,
|
||||
unsigned long attrs)
|
||||
{
|
||||
struct iova_domain *iovad = &domain->stream_iovad;
|
||||
unsigned long limit = domain->bounce_size - 1;
|
||||
phys_addr_t pa = page_to_phys(page) + offset;
|
||||
dma_addr_t iova = vduse_domain_alloc_iova(iovad, size, limit);
|
||||
|
||||
if (!iova)
|
||||
return DMA_MAPPING_ERROR;
|
||||
|
||||
if (vduse_domain_init_bounce_map(domain))
|
||||
goto err;
|
||||
|
||||
if (vduse_domain_map_bounce_page(domain, (u64)iova, (u64)size, pa))
|
||||
goto err;
|
||||
|
||||
if (dir == DMA_TO_DEVICE || dir == DMA_BIDIRECTIONAL)
|
||||
vduse_domain_bounce(domain, iova, size, DMA_TO_DEVICE);
|
||||
|
||||
return iova;
|
||||
err:
|
||||
vduse_domain_free_iova(iovad, iova, size);
|
||||
return DMA_MAPPING_ERROR;
|
||||
}
|
||||
|
||||
void vduse_domain_unmap_page(struct vduse_iova_domain *domain,
|
||||
dma_addr_t dma_addr, size_t size,
|
||||
enum dma_data_direction dir, unsigned long attrs)
|
||||
{
|
||||
struct iova_domain *iovad = &domain->stream_iovad;
|
||||
|
||||
if (dir == DMA_FROM_DEVICE || dir == DMA_BIDIRECTIONAL)
|
||||
vduse_domain_bounce(domain, dma_addr, size, DMA_FROM_DEVICE);
|
||||
|
||||
vduse_domain_unmap_bounce_page(domain, (u64)dma_addr, (u64)size);
|
||||
vduse_domain_free_iova(iovad, dma_addr, size);
|
||||
}
|
||||
|
||||
void *vduse_domain_alloc_coherent(struct vduse_iova_domain *domain,
|
||||
size_t size, dma_addr_t *dma_addr,
|
||||
gfp_t flag, unsigned long attrs)
|
||||
{
|
||||
struct iova_domain *iovad = &domain->consistent_iovad;
|
||||
unsigned long limit = domain->iova_limit;
|
||||
dma_addr_t iova = vduse_domain_alloc_iova(iovad, size, limit);
|
||||
void *orig = alloc_pages_exact(size, flag);
|
||||
|
||||
if (!iova || !orig)
|
||||
goto err;
|
||||
|
||||
spin_lock(&domain->iotlb_lock);
|
||||
if (vduse_iotlb_add_range(domain, (u64)iova, (u64)iova + size - 1,
|
||||
virt_to_phys(orig), VHOST_MAP_RW,
|
||||
domain->file, (u64)iova)) {
|
||||
spin_unlock(&domain->iotlb_lock);
|
||||
goto err;
|
||||
}
|
||||
spin_unlock(&domain->iotlb_lock);
|
||||
|
||||
*dma_addr = iova;
|
||||
|
||||
return orig;
|
||||
err:
|
||||
*dma_addr = DMA_MAPPING_ERROR;
|
||||
if (orig)
|
||||
free_pages_exact(orig, size);
|
||||
if (iova)
|
||||
vduse_domain_free_iova(iovad, iova, size);
|
||||
|
||||
return NULL;
|
||||
}
|
||||
|
||||
void vduse_domain_free_coherent(struct vduse_iova_domain *domain, size_t size,
|
||||
void *vaddr, dma_addr_t dma_addr,
|
||||
unsigned long attrs)
|
||||
{
|
||||
struct iova_domain *iovad = &domain->consistent_iovad;
|
||||
struct vhost_iotlb_map *map;
|
||||
struct vdpa_map_file *map_file;
|
||||
phys_addr_t pa;
|
||||
|
||||
spin_lock(&domain->iotlb_lock);
|
||||
map = vhost_iotlb_itree_first(domain->iotlb, (u64)dma_addr,
|
||||
(u64)dma_addr + size - 1);
|
||||
if (WARN_ON(!map)) {
|
||||
spin_unlock(&domain->iotlb_lock);
|
||||
return;
|
||||
}
|
||||
map_file = (struct vdpa_map_file *)map->opaque;
|
||||
fput(map_file->file);
|
||||
kfree(map_file);
|
||||
pa = map->addr;
|
||||
vhost_iotlb_map_free(domain->iotlb, map);
|
||||
spin_unlock(&domain->iotlb_lock);
|
||||
|
||||
vduse_domain_free_iova(iovad, dma_addr, size);
|
||||
free_pages_exact(phys_to_virt(pa), size);
|
||||
}
|
||||
|
||||
static vm_fault_t vduse_domain_mmap_fault(struct vm_fault *vmf)
|
||||
{
|
||||
struct vduse_iova_domain *domain = vmf->vma->vm_private_data;
|
||||
unsigned long iova = vmf->pgoff << PAGE_SHIFT;
|
||||
struct page *page;
|
||||
|
||||
if (!domain)
|
||||
return VM_FAULT_SIGBUS;
|
||||
|
||||
if (iova < domain->bounce_size)
|
||||
page = vduse_domain_get_bounce_page(domain, iova);
|
||||
else
|
||||
page = vduse_domain_get_coherent_page(domain, iova);
|
||||
|
||||
if (!page)
|
||||
return VM_FAULT_SIGBUS;
|
||||
|
||||
vmf->page = page;
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static const struct vm_operations_struct vduse_domain_mmap_ops = {
|
||||
.fault = vduse_domain_mmap_fault,
|
||||
};
|
||||
|
||||
static int vduse_domain_mmap(struct file *file, struct vm_area_struct *vma)
|
||||
{
|
||||
struct vduse_iova_domain *domain = file->private_data;
|
||||
|
||||
vma->vm_flags |= VM_DONTDUMP | VM_DONTEXPAND;
|
||||
vma->vm_private_data = domain;
|
||||
vma->vm_ops = &vduse_domain_mmap_ops;
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int vduse_domain_release(struct inode *inode, struct file *file)
|
||||
{
|
||||
struct vduse_iova_domain *domain = file->private_data;
|
||||
|
||||
spin_lock(&domain->iotlb_lock);
|
||||
vduse_iotlb_del_range(domain, 0, ULLONG_MAX);
|
||||
vduse_domain_free_bounce_pages(domain);
|
||||
spin_unlock(&domain->iotlb_lock);
|
||||
put_iova_domain(&domain->stream_iovad);
|
||||
put_iova_domain(&domain->consistent_iovad);
|
||||
vhost_iotlb_free(domain->iotlb);
|
||||
vfree(domain->bounce_maps);
|
||||
kfree(domain);
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static const struct file_operations vduse_domain_fops = {
|
||||
.owner = THIS_MODULE,
|
||||
.mmap = vduse_domain_mmap,
|
||||
.release = vduse_domain_release,
|
||||
};
|
||||
|
||||
void vduse_domain_destroy(struct vduse_iova_domain *domain)
|
||||
{
|
||||
fput(domain->file);
|
||||
}
|
||||
|
||||
struct vduse_iova_domain *
|
||||
vduse_domain_create(unsigned long iova_limit, size_t bounce_size)
|
||||
{
|
||||
struct vduse_iova_domain *domain;
|
||||
struct file *file;
|
||||
struct vduse_bounce_map *map;
|
||||
unsigned long pfn, bounce_pfns;
|
||||
|
||||
bounce_pfns = PAGE_ALIGN(bounce_size) >> PAGE_SHIFT;
|
||||
if (iova_limit <= bounce_size)
|
||||
return NULL;
|
||||
|
||||
domain = kzalloc(sizeof(*domain), GFP_KERNEL);
|
||||
if (!domain)
|
||||
return NULL;
|
||||
|
||||
domain->iotlb = vhost_iotlb_alloc(0, 0);
|
||||
if (!domain->iotlb)
|
||||
goto err_iotlb;
|
||||
|
||||
domain->iova_limit = iova_limit;
|
||||
domain->bounce_size = PAGE_ALIGN(bounce_size);
|
||||
domain->bounce_maps = vzalloc(bounce_pfns *
|
||||
sizeof(struct vduse_bounce_map));
|
||||
if (!domain->bounce_maps)
|
||||
goto err_map;
|
||||
|
||||
for (pfn = 0; pfn < bounce_pfns; pfn++) {
|
||||
map = &domain->bounce_maps[pfn];
|
||||
map->orig_phys = INVALID_PHYS_ADDR;
|
||||
}
|
||||
file = anon_inode_getfile("[vduse-domain]", &vduse_domain_fops,
|
||||
domain, O_RDWR);
|
||||
if (IS_ERR(file))
|
||||
goto err_file;
|
||||
|
||||
domain->file = file;
|
||||
spin_lock_init(&domain->iotlb_lock);
|
||||
init_iova_domain(&domain->stream_iovad,
|
||||
PAGE_SIZE, IOVA_START_PFN);
|
||||
init_iova_domain(&domain->consistent_iovad,
|
||||
PAGE_SIZE, bounce_pfns);
|
||||
|
||||
return domain;
|
||||
err_file:
|
||||
vfree(domain->bounce_maps);
|
||||
err_map:
|
||||
vhost_iotlb_free(domain->iotlb);
|
||||
err_iotlb:
|
||||
kfree(domain);
|
||||
return NULL;
|
||||
}
|
||||
|
||||
int vduse_domain_init(void)
|
||||
{
|
||||
return iova_cache_get();
|
||||
}
|
||||
|
||||
void vduse_domain_exit(void)
|
||||
{
|
||||
iova_cache_put();
|
||||
}
|
73
drivers/vdpa/vdpa_user/iova_domain.h
Normal file
73
drivers/vdpa/vdpa_user/iova_domain.h
Normal file
@ -0,0 +1,73 @@
|
||||
/* SPDX-License-Identifier: GPL-2.0-only */
|
||||
/*
|
||||
* MMU-based software IOTLB.
|
||||
*
|
||||
* Copyright (C) 2020-2021 Bytedance Inc. and/or its affiliates. All rights reserved.
|
||||
*
|
||||
* Author: Xie Yongji <xieyongji@bytedance.com>
|
||||
*
|
||||
*/
|
||||
|
||||
#ifndef _VDUSE_IOVA_DOMAIN_H
|
||||
#define _VDUSE_IOVA_DOMAIN_H
|
||||
|
||||
#include <linux/iova.h>
|
||||
#include <linux/dma-mapping.h>
|
||||
#include <linux/vhost_iotlb.h>
|
||||
|
||||
#define IOVA_START_PFN 1
|
||||
|
||||
#define INVALID_PHYS_ADDR (~(phys_addr_t)0)
|
||||
|
||||
struct vduse_bounce_map {
|
||||
struct page *bounce_page;
|
||||
u64 orig_phys;
|
||||
};
|
||||
|
||||
struct vduse_iova_domain {
|
||||
struct iova_domain stream_iovad;
|
||||
struct iova_domain consistent_iovad;
|
||||
struct vduse_bounce_map *bounce_maps;
|
||||
size_t bounce_size;
|
||||
unsigned long iova_limit;
|
||||
int bounce_map;
|
||||
struct vhost_iotlb *iotlb;
|
||||
spinlock_t iotlb_lock;
|
||||
struct file *file;
|
||||
};
|
||||
|
||||
int vduse_domain_set_map(struct vduse_iova_domain *domain,
|
||||
struct vhost_iotlb *iotlb);
|
||||
|
||||
void vduse_domain_clear_map(struct vduse_iova_domain *domain,
|
||||
struct vhost_iotlb *iotlb);
|
||||
|
||||
dma_addr_t vduse_domain_map_page(struct vduse_iova_domain *domain,
|
||||
struct page *page, unsigned long offset,
|
||||
size_t size, enum dma_data_direction dir,
|
||||
unsigned long attrs);
|
||||
|
||||
void vduse_domain_unmap_page(struct vduse_iova_domain *domain,
|
||||
dma_addr_t dma_addr, size_t size,
|
||||
enum dma_data_direction dir, unsigned long attrs);
|
||||
|
||||
void *vduse_domain_alloc_coherent(struct vduse_iova_domain *domain,
|
||||
size_t size, dma_addr_t *dma_addr,
|
||||
gfp_t flag, unsigned long attrs);
|
||||
|
||||
void vduse_domain_free_coherent(struct vduse_iova_domain *domain, size_t size,
|
||||
void *vaddr, dma_addr_t dma_addr,
|
||||
unsigned long attrs);
|
||||
|
||||
void vduse_domain_reset_bounce_map(struct vduse_iova_domain *domain);
|
||||
|
||||
void vduse_domain_destroy(struct vduse_iova_domain *domain);
|
||||
|
||||
struct vduse_iova_domain *vduse_domain_create(unsigned long iova_limit,
|
||||
size_t bounce_size);
|
||||
|
||||
int vduse_domain_init(void);
|
||||
|
||||
void vduse_domain_exit(void);
|
||||
|
||||
#endif /* _VDUSE_IOVA_DOMAIN_H */
|
1641
drivers/vdpa/vdpa_user/vduse_dev.c
Normal file
1641
drivers/vdpa/vdpa_user/vduse_dev.c
Normal file
File diff suppressed because it is too large
Load Diff
@ -189,10 +189,20 @@ static void vp_vdpa_set_status(struct vdpa_device *vdpa, u8 status)
|
||||
}
|
||||
|
||||
vp_modern_set_status(mdev, status);
|
||||
}
|
||||
|
||||
if (!(status & VIRTIO_CONFIG_S_DRIVER_OK) &&
|
||||
(s & VIRTIO_CONFIG_S_DRIVER_OK))
|
||||
static int vp_vdpa_reset(struct vdpa_device *vdpa)
|
||||
{
|
||||
struct vp_vdpa *vp_vdpa = vdpa_to_vp(vdpa);
|
||||
struct virtio_pci_modern_device *mdev = &vp_vdpa->mdev;
|
||||
u8 s = vp_vdpa_get_status(vdpa);
|
||||
|
||||
vp_modern_set_status(mdev, 0);
|
||||
|
||||
if (s & VIRTIO_CONFIG_S_DRIVER_OK)
|
||||
vp_vdpa_free_irq(vp_vdpa);
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static u16 vp_vdpa_get_vq_num_max(struct vdpa_device *vdpa)
|
||||
@ -398,6 +408,7 @@ static const struct vdpa_config_ops vp_vdpa_ops = {
|
||||
.set_features = vp_vdpa_set_features,
|
||||
.get_status = vp_vdpa_get_status,
|
||||
.set_status = vp_vdpa_set_status,
|
||||
.reset = vp_vdpa_reset,
|
||||
.get_vq_num_max = vp_vdpa_get_vq_num_max,
|
||||
.get_vq_state = vp_vdpa_get_vq_state,
|
||||
.get_vq_notification = vp_vdpa_get_vq_notification,
|
||||
@ -435,7 +446,7 @@ static int vp_vdpa_probe(struct pci_dev *pdev, const struct pci_device_id *id)
|
||||
return ret;
|
||||
|
||||
vp_vdpa = vdpa_alloc_device(struct vp_vdpa, vdpa,
|
||||
dev, &vp_vdpa_ops, NULL);
|
||||
dev, &vp_vdpa_ops, NULL, false);
|
||||
if (IS_ERR(vp_vdpa)) {
|
||||
dev_err(dev, "vp_vdpa: Failed to allocate vDPA structure\n");
|
||||
return PTR_ERR(vp_vdpa);
|
||||
|
@ -36,19 +36,21 @@ void vhost_iotlb_map_free(struct vhost_iotlb *iotlb,
|
||||
EXPORT_SYMBOL_GPL(vhost_iotlb_map_free);
|
||||
|
||||
/**
|
||||
* vhost_iotlb_add_range - add a new range to vhost IOTLB
|
||||
* vhost_iotlb_add_range_ctx - add a new range to vhost IOTLB
|
||||
* @iotlb: the IOTLB
|
||||
* @start: start of the IOVA range
|
||||
* @last: last of IOVA range
|
||||
* @addr: the address that is mapped to @start
|
||||
* @perm: access permission of this range
|
||||
* @opaque: the opaque pointer for the new mapping
|
||||
*
|
||||
* Returns an error last is smaller than start or memory allocation
|
||||
* fails
|
||||
*/
|
||||
int vhost_iotlb_add_range(struct vhost_iotlb *iotlb,
|
||||
u64 start, u64 last,
|
||||
u64 addr, unsigned int perm)
|
||||
int vhost_iotlb_add_range_ctx(struct vhost_iotlb *iotlb,
|
||||
u64 start, u64 last,
|
||||
u64 addr, unsigned int perm,
|
||||
void *opaque)
|
||||
{
|
||||
struct vhost_iotlb_map *map;
|
||||
|
||||
@ -71,6 +73,7 @@ int vhost_iotlb_add_range(struct vhost_iotlb *iotlb,
|
||||
map->last = last;
|
||||
map->addr = addr;
|
||||
map->perm = perm;
|
||||
map->opaque = opaque;
|
||||
|
||||
iotlb->nmaps++;
|
||||
vhost_iotlb_itree_insert(map, &iotlb->root);
|
||||
@ -80,6 +83,15 @@ int vhost_iotlb_add_range(struct vhost_iotlb *iotlb,
|
||||
|
||||
return 0;
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(vhost_iotlb_add_range_ctx);
|
||||
|
||||
int vhost_iotlb_add_range(struct vhost_iotlb *iotlb,
|
||||
u64 start, u64 last,
|
||||
u64 addr, unsigned int perm)
|
||||
{
|
||||
return vhost_iotlb_add_range_ctx(iotlb, start, last,
|
||||
addr, perm, NULL);
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(vhost_iotlb_add_range);
|
||||
|
||||
/**
|
||||
|
@ -1,24 +1,12 @@
|
||||
// SPDX-License-Identifier: GPL-2.0+
|
||||
/*******************************************************************************
|
||||
* Vhost kernel TCM fabric driver for virtio SCSI initiators
|
||||
*
|
||||
* (C) Copyright 2010-2013 Datera, Inc.
|
||||
* (C) Copyright 2010-2012 IBM Corp.
|
||||
*
|
||||
* Licensed to the Linux Foundation under the General Public License (GPL) version 2.
|
||||
*
|
||||
* Authors: Nicholas A. Bellinger <nab@daterainc.com>
|
||||
* Stefan Hajnoczi <stefanha@linux.vnet.ibm.com>
|
||||
*
|
||||
* This program is free software; you can redistribute it and/or modify
|
||||
* it under the terms of the GNU General Public License as published by
|
||||
* the Free Software Foundation; either version 2 of the License, or
|
||||
* (at your option) any later version.
|
||||
*
|
||||
* This program is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
* GNU General Public License for more details.
|
||||
*
|
||||
****************************************************************************/
|
||||
|
||||
#include <linux/module.h>
|
||||
|
@ -116,12 +116,13 @@ static void vhost_vdpa_unsetup_vq_irq(struct vhost_vdpa *v, u16 qid)
|
||||
irq_bypass_unregister_producer(&vq->call_ctx.producer);
|
||||
}
|
||||
|
||||
static void vhost_vdpa_reset(struct vhost_vdpa *v)
|
||||
static int vhost_vdpa_reset(struct vhost_vdpa *v)
|
||||
{
|
||||
struct vdpa_device *vdpa = v->vdpa;
|
||||
|
||||
vdpa_reset(vdpa);
|
||||
v->in_batch = 0;
|
||||
|
||||
return vdpa_reset(vdpa);
|
||||
}
|
||||
|
||||
static long vhost_vdpa_get_device_id(struct vhost_vdpa *v, u8 __user *argp)
|
||||
@ -157,7 +158,7 @@ static long vhost_vdpa_set_status(struct vhost_vdpa *v, u8 __user *statusp)
|
||||
struct vdpa_device *vdpa = v->vdpa;
|
||||
const struct vdpa_config_ops *ops = vdpa->config;
|
||||
u8 status, status_old;
|
||||
int nvqs = v->nvqs;
|
||||
int ret, nvqs = v->nvqs;
|
||||
u16 i;
|
||||
|
||||
if (copy_from_user(&status, statusp, sizeof(status)))
|
||||
@ -172,7 +173,12 @@ static long vhost_vdpa_set_status(struct vhost_vdpa *v, u8 __user *statusp)
|
||||
if (status != 0 && (ops->get_status(vdpa) & ~status) != 0)
|
||||
return -EINVAL;
|
||||
|
||||
ops->set_status(vdpa, status);
|
||||
if (status == 0) {
|
||||
ret = ops->reset(vdpa);
|
||||
if (ret)
|
||||
return ret;
|
||||
} else
|
||||
ops->set_status(vdpa, status);
|
||||
|
||||
if ((status & VIRTIO_CONFIG_S_DRIVER_OK) && !(status_old & VIRTIO_CONFIG_S_DRIVER_OK))
|
||||
for (i = 0; i < nvqs; i++)
|
||||
@ -498,7 +504,7 @@ static long vhost_vdpa_unlocked_ioctl(struct file *filep,
|
||||
return r;
|
||||
}
|
||||
|
||||
static void vhost_vdpa_iotlb_unmap(struct vhost_vdpa *v, u64 start, u64 last)
|
||||
static void vhost_vdpa_pa_unmap(struct vhost_vdpa *v, u64 start, u64 last)
|
||||
{
|
||||
struct vhost_dev *dev = &v->vdev;
|
||||
struct vhost_iotlb *iotlb = dev->iotlb;
|
||||
@ -507,19 +513,44 @@ static void vhost_vdpa_iotlb_unmap(struct vhost_vdpa *v, u64 start, u64 last)
|
||||
unsigned long pfn, pinned;
|
||||
|
||||
while ((map = vhost_iotlb_itree_first(iotlb, start, last)) != NULL) {
|
||||
pinned = map->size >> PAGE_SHIFT;
|
||||
for (pfn = map->addr >> PAGE_SHIFT;
|
||||
pinned = PFN_DOWN(map->size);
|
||||
for (pfn = PFN_DOWN(map->addr);
|
||||
pinned > 0; pfn++, pinned--) {
|
||||
page = pfn_to_page(pfn);
|
||||
if (map->perm & VHOST_ACCESS_WO)
|
||||
set_page_dirty_lock(page);
|
||||
unpin_user_page(page);
|
||||
}
|
||||
atomic64_sub(map->size >> PAGE_SHIFT, &dev->mm->pinned_vm);
|
||||
atomic64_sub(PFN_DOWN(map->size), &dev->mm->pinned_vm);
|
||||
vhost_iotlb_map_free(iotlb, map);
|
||||
}
|
||||
}
|
||||
|
||||
static void vhost_vdpa_va_unmap(struct vhost_vdpa *v, u64 start, u64 last)
|
||||
{
|
||||
struct vhost_dev *dev = &v->vdev;
|
||||
struct vhost_iotlb *iotlb = dev->iotlb;
|
||||
struct vhost_iotlb_map *map;
|
||||
struct vdpa_map_file *map_file;
|
||||
|
||||
while ((map = vhost_iotlb_itree_first(iotlb, start, last)) != NULL) {
|
||||
map_file = (struct vdpa_map_file *)map->opaque;
|
||||
fput(map_file->file);
|
||||
kfree(map_file);
|
||||
vhost_iotlb_map_free(iotlb, map);
|
||||
}
|
||||
}
|
||||
|
||||
static void vhost_vdpa_iotlb_unmap(struct vhost_vdpa *v, u64 start, u64 last)
|
||||
{
|
||||
struct vdpa_device *vdpa = v->vdpa;
|
||||
|
||||
if (vdpa->use_va)
|
||||
return vhost_vdpa_va_unmap(v, start, last);
|
||||
|
||||
return vhost_vdpa_pa_unmap(v, start, last);
|
||||
}
|
||||
|
||||
static void vhost_vdpa_iotlb_free(struct vhost_vdpa *v)
|
||||
{
|
||||
struct vhost_dev *dev = &v->vdev;
|
||||
@ -551,21 +582,21 @@ static int perm_to_iommu_flags(u32 perm)
|
||||
return flags | IOMMU_CACHE;
|
||||
}
|
||||
|
||||
static int vhost_vdpa_map(struct vhost_vdpa *v,
|
||||
u64 iova, u64 size, u64 pa, u32 perm)
|
||||
static int vhost_vdpa_map(struct vhost_vdpa *v, u64 iova,
|
||||
u64 size, u64 pa, u32 perm, void *opaque)
|
||||
{
|
||||
struct vhost_dev *dev = &v->vdev;
|
||||
struct vdpa_device *vdpa = v->vdpa;
|
||||
const struct vdpa_config_ops *ops = vdpa->config;
|
||||
int r = 0;
|
||||
|
||||
r = vhost_iotlb_add_range(dev->iotlb, iova, iova + size - 1,
|
||||
pa, perm);
|
||||
r = vhost_iotlb_add_range_ctx(dev->iotlb, iova, iova + size - 1,
|
||||
pa, perm, opaque);
|
||||
if (r)
|
||||
return r;
|
||||
|
||||
if (ops->dma_map) {
|
||||
r = ops->dma_map(vdpa, iova, size, pa, perm);
|
||||
r = ops->dma_map(vdpa, iova, size, pa, perm, opaque);
|
||||
} else if (ops->set_map) {
|
||||
if (!v->in_batch)
|
||||
r = ops->set_map(vdpa, dev->iotlb);
|
||||
@ -573,13 +604,15 @@ static int vhost_vdpa_map(struct vhost_vdpa *v,
|
||||
r = iommu_map(v->domain, iova, pa, size,
|
||||
perm_to_iommu_flags(perm));
|
||||
}
|
||||
|
||||
if (r)
|
||||
if (r) {
|
||||
vhost_iotlb_del_range(dev->iotlb, iova, iova + size - 1);
|
||||
else
|
||||
atomic64_add(size >> PAGE_SHIFT, &dev->mm->pinned_vm);
|
||||
return r;
|
||||
}
|
||||
|
||||
return r;
|
||||
if (!vdpa->use_va)
|
||||
atomic64_add(PFN_DOWN(size), &dev->mm->pinned_vm);
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static void vhost_vdpa_unmap(struct vhost_vdpa *v, u64 iova, u64 size)
|
||||
@ -600,38 +633,78 @@ static void vhost_vdpa_unmap(struct vhost_vdpa *v, u64 iova, u64 size)
|
||||
}
|
||||
}
|
||||
|
||||
static int vhost_vdpa_process_iotlb_update(struct vhost_vdpa *v,
|
||||
struct vhost_iotlb_msg *msg)
|
||||
static int vhost_vdpa_va_map(struct vhost_vdpa *v,
|
||||
u64 iova, u64 size, u64 uaddr, u32 perm)
|
||||
{
|
||||
struct vhost_dev *dev = &v->vdev;
|
||||
u64 offset, map_size, map_iova = iova;
|
||||
struct vdpa_map_file *map_file;
|
||||
struct vm_area_struct *vma;
|
||||
int ret;
|
||||
|
||||
mmap_read_lock(dev->mm);
|
||||
|
||||
while (size) {
|
||||
vma = find_vma(dev->mm, uaddr);
|
||||
if (!vma) {
|
||||
ret = -EINVAL;
|
||||
break;
|
||||
}
|
||||
map_size = min(size, vma->vm_end - uaddr);
|
||||
if (!(vma->vm_file && (vma->vm_flags & VM_SHARED) &&
|
||||
!(vma->vm_flags & (VM_IO | VM_PFNMAP))))
|
||||
goto next;
|
||||
|
||||
map_file = kzalloc(sizeof(*map_file), GFP_KERNEL);
|
||||
if (!map_file) {
|
||||
ret = -ENOMEM;
|
||||
break;
|
||||
}
|
||||
offset = (vma->vm_pgoff << PAGE_SHIFT) + uaddr - vma->vm_start;
|
||||
map_file->offset = offset;
|
||||
map_file->file = get_file(vma->vm_file);
|
||||
ret = vhost_vdpa_map(v, map_iova, map_size, uaddr,
|
||||
perm, map_file);
|
||||
if (ret) {
|
||||
fput(map_file->file);
|
||||
kfree(map_file);
|
||||
break;
|
||||
}
|
||||
next:
|
||||
size -= map_size;
|
||||
uaddr += map_size;
|
||||
map_iova += map_size;
|
||||
}
|
||||
if (ret)
|
||||
vhost_vdpa_unmap(v, iova, map_iova - iova);
|
||||
|
||||
mmap_read_unlock(dev->mm);
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
static int vhost_vdpa_pa_map(struct vhost_vdpa *v,
|
||||
u64 iova, u64 size, u64 uaddr, u32 perm)
|
||||
{
|
||||
struct vhost_dev *dev = &v->vdev;
|
||||
struct vhost_iotlb *iotlb = dev->iotlb;
|
||||
struct page **page_list;
|
||||
unsigned long list_size = PAGE_SIZE / sizeof(struct page *);
|
||||
unsigned int gup_flags = FOLL_LONGTERM;
|
||||
unsigned long npages, cur_base, map_pfn, last_pfn = 0;
|
||||
unsigned long lock_limit, sz2pin, nchunks, i;
|
||||
u64 iova = msg->iova;
|
||||
u64 start = iova;
|
||||
long pinned;
|
||||
int ret = 0;
|
||||
|
||||
if (msg->iova < v->range.first || !msg->size ||
|
||||
msg->iova > U64_MAX - msg->size + 1 ||
|
||||
msg->iova + msg->size - 1 > v->range.last)
|
||||
return -EINVAL;
|
||||
|
||||
if (vhost_iotlb_itree_first(iotlb, msg->iova,
|
||||
msg->iova + msg->size - 1))
|
||||
return -EEXIST;
|
||||
|
||||
/* Limit the use of memory for bookkeeping */
|
||||
page_list = (struct page **) __get_free_page(GFP_KERNEL);
|
||||
if (!page_list)
|
||||
return -ENOMEM;
|
||||
|
||||
if (msg->perm & VHOST_ACCESS_WO)
|
||||
if (perm & VHOST_ACCESS_WO)
|
||||
gup_flags |= FOLL_WRITE;
|
||||
|
||||
npages = PAGE_ALIGN(msg->size + (iova & ~PAGE_MASK)) >> PAGE_SHIFT;
|
||||
npages = PFN_UP(size + (iova & ~PAGE_MASK));
|
||||
if (!npages) {
|
||||
ret = -EINVAL;
|
||||
goto free;
|
||||
@ -639,13 +712,13 @@ static int vhost_vdpa_process_iotlb_update(struct vhost_vdpa *v,
|
||||
|
||||
mmap_read_lock(dev->mm);
|
||||
|
||||
lock_limit = rlimit(RLIMIT_MEMLOCK) >> PAGE_SHIFT;
|
||||
lock_limit = PFN_DOWN(rlimit(RLIMIT_MEMLOCK));
|
||||
if (npages + atomic64_read(&dev->mm->pinned_vm) > lock_limit) {
|
||||
ret = -ENOMEM;
|
||||
goto unlock;
|
||||
}
|
||||
|
||||
cur_base = msg->uaddr & PAGE_MASK;
|
||||
cur_base = uaddr & PAGE_MASK;
|
||||
iova &= PAGE_MASK;
|
||||
nchunks = 0;
|
||||
|
||||
@ -673,10 +746,10 @@ static int vhost_vdpa_process_iotlb_update(struct vhost_vdpa *v,
|
||||
|
||||
if (last_pfn && (this_pfn != last_pfn + 1)) {
|
||||
/* Pin a contiguous chunk of memory */
|
||||
csize = (last_pfn - map_pfn + 1) << PAGE_SHIFT;
|
||||
csize = PFN_PHYS(last_pfn - map_pfn + 1);
|
||||
ret = vhost_vdpa_map(v, iova, csize,
|
||||
map_pfn << PAGE_SHIFT,
|
||||
msg->perm);
|
||||
PFN_PHYS(map_pfn),
|
||||
perm, NULL);
|
||||
if (ret) {
|
||||
/*
|
||||
* Unpin the pages that are left unmapped
|
||||
@ -699,13 +772,13 @@ static int vhost_vdpa_process_iotlb_update(struct vhost_vdpa *v,
|
||||
last_pfn = this_pfn;
|
||||
}
|
||||
|
||||
cur_base += pinned << PAGE_SHIFT;
|
||||
cur_base += PFN_PHYS(pinned);
|
||||
npages -= pinned;
|
||||
}
|
||||
|
||||
/* Pin the rest chunk */
|
||||
ret = vhost_vdpa_map(v, iova, (last_pfn - map_pfn + 1) << PAGE_SHIFT,
|
||||
map_pfn << PAGE_SHIFT, msg->perm);
|
||||
ret = vhost_vdpa_map(v, iova, PFN_PHYS(last_pfn - map_pfn + 1),
|
||||
PFN_PHYS(map_pfn), perm, NULL);
|
||||
out:
|
||||
if (ret) {
|
||||
if (nchunks) {
|
||||
@ -724,13 +797,38 @@ out:
|
||||
for (pfn = map_pfn; pfn <= last_pfn; pfn++)
|
||||
unpin_user_page(pfn_to_page(pfn));
|
||||
}
|
||||
vhost_vdpa_unmap(v, msg->iova, msg->size);
|
||||
vhost_vdpa_unmap(v, start, size);
|
||||
}
|
||||
unlock:
|
||||
mmap_read_unlock(dev->mm);
|
||||
free:
|
||||
free_page((unsigned long)page_list);
|
||||
return ret;
|
||||
|
||||
}
|
||||
|
||||
static int vhost_vdpa_process_iotlb_update(struct vhost_vdpa *v,
|
||||
struct vhost_iotlb_msg *msg)
|
||||
{
|
||||
struct vhost_dev *dev = &v->vdev;
|
||||
struct vdpa_device *vdpa = v->vdpa;
|
||||
struct vhost_iotlb *iotlb = dev->iotlb;
|
||||
|
||||
if (msg->iova < v->range.first || !msg->size ||
|
||||
msg->iova > U64_MAX - msg->size + 1 ||
|
||||
msg->iova + msg->size - 1 > v->range.last)
|
||||
return -EINVAL;
|
||||
|
||||
if (vhost_iotlb_itree_first(iotlb, msg->iova,
|
||||
msg->iova + msg->size - 1))
|
||||
return -EEXIST;
|
||||
|
||||
if (vdpa->use_va)
|
||||
return vhost_vdpa_va_map(v, msg->iova, msg->size,
|
||||
msg->uaddr, msg->perm);
|
||||
|
||||
return vhost_vdpa_pa_map(v, msg->iova, msg->size, msg->uaddr,
|
||||
msg->perm);
|
||||
}
|
||||
|
||||
static int vhost_vdpa_process_iotlb_msg(struct vhost_dev *dev,
|
||||
@ -860,7 +958,9 @@ static int vhost_vdpa_open(struct inode *inode, struct file *filep)
|
||||
return -EBUSY;
|
||||
|
||||
nvqs = v->nvqs;
|
||||
vhost_vdpa_reset(v);
|
||||
r = vhost_vdpa_reset(v);
|
||||
if (r)
|
||||
goto err;
|
||||
|
||||
vqs = kmalloc_array(nvqs, sizeof(*vqs), GFP_KERNEL);
|
||||
if (!vqs) {
|
||||
@ -945,7 +1045,7 @@ static vm_fault_t vhost_vdpa_fault(struct vm_fault *vmf)
|
||||
|
||||
vma->vm_page_prot = pgprot_noncached(vma->vm_page_prot);
|
||||
if (remap_pfn_range(vma, vmf->address & PAGE_MASK,
|
||||
notify.addr >> PAGE_SHIFT, PAGE_SIZE,
|
||||
PFN_DOWN(notify.addr), PAGE_SIZE,
|
||||
vma->vm_page_prot))
|
||||
return VM_FAULT_SIGBUS;
|
||||
|
||||
|
@ -114,7 +114,7 @@ vhost_transport_do_send_pkt(struct vhost_vsock *vsock,
|
||||
size_t nbytes;
|
||||
size_t iov_len, payload_len;
|
||||
int head;
|
||||
bool restore_flag = false;
|
||||
u32 flags_to_restore = 0;
|
||||
|
||||
spin_lock_bh(&vsock->send_pkt_list_lock);
|
||||
if (list_empty(&vsock->send_pkt_list)) {
|
||||
@ -178,16 +178,21 @@ vhost_transport_do_send_pkt(struct vhost_vsock *vsock,
|
||||
* small rx buffers, headers of packets in rx queue are
|
||||
* created dynamically and are initialized with header
|
||||
* of current packet(except length). But in case of
|
||||
* SOCK_SEQPACKET, we also must clear record delimeter
|
||||
* bit(VIRTIO_VSOCK_SEQ_EOR). Otherwise, instead of one
|
||||
* packet with delimeter(which marks end of record),
|
||||
* there will be sequence of packets with delimeter
|
||||
* bit set. After initialized header will be copied to
|
||||
* rx buffer, this bit will be restored.
|
||||
* SOCK_SEQPACKET, we also must clear message delimeter
|
||||
* bit (VIRTIO_VSOCK_SEQ_EOM) and MSG_EOR bit
|
||||
* (VIRTIO_VSOCK_SEQ_EOR) if set. Otherwise,
|
||||
* there will be sequence of packets with these
|
||||
* bits set. After initialized header will be copied to
|
||||
* rx buffer, these required bits will be restored.
|
||||
*/
|
||||
if (le32_to_cpu(pkt->hdr.flags) & VIRTIO_VSOCK_SEQ_EOR) {
|
||||
pkt->hdr.flags &= ~cpu_to_le32(VIRTIO_VSOCK_SEQ_EOR);
|
||||
restore_flag = true;
|
||||
if (le32_to_cpu(pkt->hdr.flags) & VIRTIO_VSOCK_SEQ_EOM) {
|
||||
pkt->hdr.flags &= ~cpu_to_le32(VIRTIO_VSOCK_SEQ_EOM);
|
||||
flags_to_restore |= VIRTIO_VSOCK_SEQ_EOM;
|
||||
|
||||
if (le32_to_cpu(pkt->hdr.flags) & VIRTIO_VSOCK_SEQ_EOR) {
|
||||
pkt->hdr.flags &= ~cpu_to_le32(VIRTIO_VSOCK_SEQ_EOR);
|
||||
flags_to_restore |= VIRTIO_VSOCK_SEQ_EOR;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@ -224,8 +229,7 @@ vhost_transport_do_send_pkt(struct vhost_vsock *vsock,
|
||||
* to send it with the next available buffer.
|
||||
*/
|
||||
if (pkt->off < pkt->len) {
|
||||
if (restore_flag)
|
||||
pkt->hdr.flags |= cpu_to_le32(VIRTIO_VSOCK_SEQ_EOR);
|
||||
pkt->hdr.flags |= cpu_to_le32(flags_to_restore);
|
||||
|
||||
/* We are queueing the same virtio_vsock_pkt to handle
|
||||
* the remaining bytes, and we want to deliver it
|
||||
|
@ -4,6 +4,7 @@
|
||||
#include <linux/virtio_config.h>
|
||||
#include <linux/module.h>
|
||||
#include <linux/idr.h>
|
||||
#include <linux/of.h>
|
||||
#include <uapi/linux/virtio_ids.h>
|
||||
|
||||
/* Unique numbering for virtio devices. */
|
||||
@ -292,6 +293,8 @@ static void virtio_dev_remove(struct device *_d)
|
||||
|
||||
/* Acknowledge the device's existence again. */
|
||||
virtio_add_status(dev, VIRTIO_CONFIG_S_ACKNOWLEDGE);
|
||||
|
||||
of_node_put(dev->dev.of_node);
|
||||
}
|
||||
|
||||
static struct bus_type virtio_bus = {
|
||||
@ -318,6 +321,43 @@ void unregister_virtio_driver(struct virtio_driver *driver)
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(unregister_virtio_driver);
|
||||
|
||||
static int virtio_device_of_init(struct virtio_device *dev)
|
||||
{
|
||||
struct device_node *np, *pnode = dev_of_node(dev->dev.parent);
|
||||
char compat[] = "virtio,deviceXXXXXXXX";
|
||||
int ret, count;
|
||||
|
||||
if (!pnode)
|
||||
return 0;
|
||||
|
||||
count = of_get_available_child_count(pnode);
|
||||
if (!count)
|
||||
return 0;
|
||||
|
||||
/* There can be only 1 child node */
|
||||
if (WARN_ON(count > 1))
|
||||
return -EINVAL;
|
||||
|
||||
np = of_get_next_available_child(pnode, NULL);
|
||||
if (WARN_ON(!np))
|
||||
return -ENODEV;
|
||||
|
||||
ret = snprintf(compat, sizeof(compat), "virtio,device%x", dev->id.device);
|
||||
BUG_ON(ret >= sizeof(compat));
|
||||
|
||||
if (!of_device_is_compatible(np, compat)) {
|
||||
ret = -EINVAL;
|
||||
goto out;
|
||||
}
|
||||
|
||||
dev->dev.of_node = np;
|
||||
return 0;
|
||||
|
||||
out:
|
||||
of_node_put(np);
|
||||
return ret;
|
||||
}
|
||||
|
||||
/**
|
||||
* register_virtio_device - register virtio device
|
||||
* @dev : virtio device to be registered
|
||||
@ -342,6 +382,10 @@ int register_virtio_device(struct virtio_device *dev)
|
||||
dev->index = err;
|
||||
dev_set_name(&dev->dev, "virtio%u", dev->index);
|
||||
|
||||
err = virtio_device_of_init(dev);
|
||||
if (err)
|
||||
goto out_ida_remove;
|
||||
|
||||
spin_lock_init(&dev->config_lock);
|
||||
dev->config_enabled = false;
|
||||
dev->config_change_pending = false;
|
||||
@ -362,10 +406,16 @@ int register_virtio_device(struct virtio_device *dev)
|
||||
*/
|
||||
err = device_add(&dev->dev);
|
||||
if (err)
|
||||
ida_simple_remove(&virtio_index_ida, dev->index);
|
||||
goto out_of_node_put;
|
||||
|
||||
return 0;
|
||||
|
||||
out_of_node_put:
|
||||
of_node_put(dev->dev.of_node);
|
||||
out_ida_remove:
|
||||
ida_simple_remove(&virtio_index_ida, dev->index);
|
||||
out:
|
||||
if (err)
|
||||
virtio_add_status(dev, VIRTIO_CONFIG_S_FAILED);
|
||||
virtio_add_status(dev, VIRTIO_CONFIG_S_FAILED);
|
||||
return err;
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(register_virtio_device);
|
||||
|
@ -531,8 +531,8 @@ static int init_vqs(struct virtio_balloon *vb)
|
||||
callbacks[VIRTIO_BALLOON_VQ_REPORTING] = balloon_ack;
|
||||
}
|
||||
|
||||
err = vb->vdev->config->find_vqs(vb->vdev, VIRTIO_BALLOON_VQ_MAX,
|
||||
vqs, callbacks, names, NULL, NULL);
|
||||
err = virtio_find_vqs(vb->vdev, VIRTIO_BALLOON_VQ_MAX, vqs,
|
||||
callbacks, names, NULL);
|
||||
if (err)
|
||||
return err;
|
||||
|
||||
|
@ -1150,6 +1150,12 @@ int receive_fd_replace(int new_fd, struct file *file, unsigned int o_flags)
|
||||
return new_fd;
|
||||
}
|
||||
|
||||
int receive_fd(struct file *file, unsigned int o_flags)
|
||||
{
|
||||
return __receive_fd(file, NULL, o_flags);
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(receive_fd);
|
||||
|
||||
static int ksys_dup3(unsigned int oldfd, unsigned int newfd, int flags)
|
||||
{
|
||||
int err = -EBADF;
|
||||
|
@ -94,6 +94,9 @@ extern void fd_install(unsigned int fd, struct file *file);
|
||||
|
||||
extern int __receive_fd(struct file *file, int __user *ufd,
|
||||
unsigned int o_flags);
|
||||
|
||||
extern int receive_fd(struct file *file, unsigned int o_flags);
|
||||
|
||||
static inline int receive_fd_user(struct file *file, int __user *ufd,
|
||||
unsigned int o_flags)
|
||||
{
|
||||
@ -101,10 +104,6 @@ static inline int receive_fd_user(struct file *file, int __user *ufd,
|
||||
return -EFAULT;
|
||||
return __receive_fd(file, ufd, o_flags);
|
||||
}
|
||||
static inline int receive_fd(struct file *file, unsigned int o_flags)
|
||||
{
|
||||
return __receive_fd(file, NULL, o_flags);
|
||||
}
|
||||
int receive_fd_replace(int new_fd, struct file *file, unsigned int o_flags);
|
||||
|
||||
extern void flush_delayed_fput(void);
|
||||
|
@ -43,17 +43,17 @@ struct vdpa_vq_state_split {
|
||||
* @last_used_idx: used index
|
||||
*/
|
||||
struct vdpa_vq_state_packed {
|
||||
u16 last_avail_counter:1;
|
||||
u16 last_avail_idx:15;
|
||||
u16 last_used_counter:1;
|
||||
u16 last_used_idx:15;
|
||||
u16 last_avail_counter:1;
|
||||
u16 last_avail_idx:15;
|
||||
u16 last_used_counter:1;
|
||||
u16 last_used_idx:15;
|
||||
};
|
||||
|
||||
struct vdpa_vq_state {
|
||||
union {
|
||||
struct vdpa_vq_state_split split;
|
||||
struct vdpa_vq_state_packed packed;
|
||||
};
|
||||
union {
|
||||
struct vdpa_vq_state_split split;
|
||||
struct vdpa_vq_state_packed packed;
|
||||
};
|
||||
};
|
||||
|
||||
struct vdpa_mgmt_dev;
|
||||
@ -65,6 +65,7 @@ struct vdpa_mgmt_dev;
|
||||
* @config: the configuration ops for this device.
|
||||
* @index: device index
|
||||
* @features_valid: were features initialized? for legacy guests
|
||||
* @use_va: indicate whether virtual address must be used by this device
|
||||
* @nvqs: maximum number of supported virtqueues
|
||||
* @mdev: management device pointer; caller must setup when registering device as part
|
||||
* of dev_add() mgmtdev ops callback before invoking _vdpa_register_device().
|
||||
@ -75,6 +76,7 @@ struct vdpa_device {
|
||||
const struct vdpa_config_ops *config;
|
||||
unsigned int index;
|
||||
bool features_valid;
|
||||
bool use_va;
|
||||
int nvqs;
|
||||
struct vdpa_mgmt_dev *mdev;
|
||||
};
|
||||
@ -89,6 +91,16 @@ struct vdpa_iova_range {
|
||||
u64 last;
|
||||
};
|
||||
|
||||
/**
|
||||
* Corresponding file area for device memory mapping
|
||||
* @file: vma->vm_file for the mapping
|
||||
* @offset: mapping offset in the vm_file
|
||||
*/
|
||||
struct vdpa_map_file {
|
||||
struct file *file;
|
||||
u64 offset;
|
||||
};
|
||||
|
||||
/**
|
||||
* struct vdpa_config_ops - operations for configuring a vDPA device.
|
||||
* Note: vDPA device drivers are required to implement all of the
|
||||
@ -131,7 +143,7 @@ struct vdpa_iova_range {
|
||||
* @vdev: vdpa device
|
||||
* @idx: virtqueue index
|
||||
* @state: pointer to returned state (last_avail_idx)
|
||||
* @get_vq_notification: Get the notification area for a virtqueue
|
||||
* @get_vq_notification: Get the notification area for a virtqueue
|
||||
* @vdev: vdpa device
|
||||
* @idx: virtqueue index
|
||||
* Returns the notifcation area
|
||||
@ -171,6 +183,9 @@ struct vdpa_iova_range {
|
||||
* @set_status: Set the device status
|
||||
* @vdev: vdpa device
|
||||
* @status: virtio device status
|
||||
* @reset: Reset device
|
||||
* @vdev: vdpa device
|
||||
* Returns integer: success (0) or error (< 0)
|
||||
* @get_config_size: Get the size of the configuration space
|
||||
* @vdev: vdpa device
|
||||
* Returns size_t: configuration size
|
||||
@ -255,6 +270,7 @@ struct vdpa_config_ops {
|
||||
u32 (*get_vendor_id)(struct vdpa_device *vdev);
|
||||
u8 (*get_status)(struct vdpa_device *vdev);
|
||||
void (*set_status)(struct vdpa_device *vdev, u8 status);
|
||||
int (*reset)(struct vdpa_device *vdev);
|
||||
size_t (*get_config_size)(struct vdpa_device *vdev);
|
||||
void (*get_config)(struct vdpa_device *vdev, unsigned int offset,
|
||||
void *buf, unsigned int len);
|
||||
@ -266,7 +282,7 @@ struct vdpa_config_ops {
|
||||
/* DMA ops */
|
||||
int (*set_map)(struct vdpa_device *vdev, struct vhost_iotlb *iotlb);
|
||||
int (*dma_map)(struct vdpa_device *vdev, u64 iova, u64 size,
|
||||
u64 pa, u32 perm);
|
||||
u64 pa, u32 perm, void *opaque);
|
||||
int (*dma_unmap)(struct vdpa_device *vdev, u64 iova, u64 size);
|
||||
|
||||
/* Free device resources */
|
||||
@ -275,7 +291,8 @@ struct vdpa_config_ops {
|
||||
|
||||
struct vdpa_device *__vdpa_alloc_device(struct device *parent,
|
||||
const struct vdpa_config_ops *config,
|
||||
size_t size, const char *name);
|
||||
size_t size, const char *name,
|
||||
bool use_va);
|
||||
|
||||
/**
|
||||
* vdpa_alloc_device - allocate and initilaize a vDPA device
|
||||
@ -285,15 +302,16 @@ struct vdpa_device *__vdpa_alloc_device(struct device *parent,
|
||||
* @parent: the parent device
|
||||
* @config: the bus operations that is supported by this device
|
||||
* @name: name of the vdpa device
|
||||
* @use_va: indicate whether virtual address must be used by this device
|
||||
*
|
||||
* Return allocated data structure or ERR_PTR upon error
|
||||
*/
|
||||
#define vdpa_alloc_device(dev_struct, member, parent, config, name) \
|
||||
#define vdpa_alloc_device(dev_struct, member, parent, config, name, use_va) \
|
||||
container_of(__vdpa_alloc_device( \
|
||||
parent, config, \
|
||||
sizeof(dev_struct) + \
|
||||
BUILD_BUG_ON_ZERO(offsetof( \
|
||||
dev_struct, member)), name), \
|
||||
dev_struct, member)), name, use_va), \
|
||||
dev_struct, member)
|
||||
|
||||
int vdpa_register_device(struct vdpa_device *vdev, int nvqs);
|
||||
@ -348,27 +366,27 @@ static inline struct device *vdpa_get_dma_dev(struct vdpa_device *vdev)
|
||||
return vdev->dma_dev;
|
||||
}
|
||||
|
||||
static inline void vdpa_reset(struct vdpa_device *vdev)
|
||||
static inline int vdpa_reset(struct vdpa_device *vdev)
|
||||
{
|
||||
const struct vdpa_config_ops *ops = vdev->config;
|
||||
const struct vdpa_config_ops *ops = vdev->config;
|
||||
|
||||
vdev->features_valid = false;
|
||||
ops->set_status(vdev, 0);
|
||||
return ops->reset(vdev);
|
||||
}
|
||||
|
||||
static inline int vdpa_set_features(struct vdpa_device *vdev, u64 features)
|
||||
{
|
||||
const struct vdpa_config_ops *ops = vdev->config;
|
||||
const struct vdpa_config_ops *ops = vdev->config;
|
||||
|
||||
vdev->features_valid = true;
|
||||
return ops->set_features(vdev, features);
|
||||
return ops->set_features(vdev, features);
|
||||
}
|
||||
|
||||
|
||||
static inline void vdpa_get_config(struct vdpa_device *vdev, unsigned offset,
|
||||
void *buf, unsigned int len)
|
||||
static inline void vdpa_get_config(struct vdpa_device *vdev,
|
||||
unsigned int offset, void *buf,
|
||||
unsigned int len)
|
||||
{
|
||||
const struct vdpa_config_ops *ops = vdev->config;
|
||||
const struct vdpa_config_ops *ops = vdev->config;
|
||||
|
||||
/*
|
||||
* Config accesses aren't supposed to trigger before features are set.
|
||||
|
@ -17,6 +17,7 @@ struct vhost_iotlb_map {
|
||||
u32 perm;
|
||||
u32 flags_padding;
|
||||
u64 __subtree_last;
|
||||
void *opaque;
|
||||
};
|
||||
|
||||
#define VHOST_IOTLB_FLAG_RETIRE 0x1
|
||||
@ -29,6 +30,8 @@ struct vhost_iotlb {
|
||||
unsigned int flags;
|
||||
};
|
||||
|
||||
int vhost_iotlb_add_range_ctx(struct vhost_iotlb *iotlb, u64 start, u64 last,
|
||||
u64 addr, unsigned int perm, void *opaque);
|
||||
int vhost_iotlb_add_range(struct vhost_iotlb *iotlb, u64 start, u64 last,
|
||||
u64 addr, unsigned int perm);
|
||||
void vhost_iotlb_del_range(struct vhost_iotlb *iotlb, u64 start, u64 last);
|
||||
|
306
include/uapi/linux/vduse.h
Normal file
306
include/uapi/linux/vduse.h
Normal file
@ -0,0 +1,306 @@
|
||||
/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
|
||||
#ifndef _UAPI_VDUSE_H_
|
||||
#define _UAPI_VDUSE_H_
|
||||
|
||||
#include <linux/types.h>
|
||||
|
||||
#define VDUSE_BASE 0x81
|
||||
|
||||
/* The ioctls for control device (/dev/vduse/control) */
|
||||
|
||||
#define VDUSE_API_VERSION 0
|
||||
|
||||
/*
|
||||
* Get the version of VDUSE API that kernel supported (VDUSE_API_VERSION).
|
||||
* This is used for future extension.
|
||||
*/
|
||||
#define VDUSE_GET_API_VERSION _IOR(VDUSE_BASE, 0x00, __u64)
|
||||
|
||||
/* Set the version of VDUSE API that userspace supported. */
|
||||
#define VDUSE_SET_API_VERSION _IOW(VDUSE_BASE, 0x01, __u64)
|
||||
|
||||
/**
|
||||
* struct vduse_dev_config - basic configuration of a VDUSE device
|
||||
* @name: VDUSE device name, needs to be NUL terminated
|
||||
* @vendor_id: virtio vendor id
|
||||
* @device_id: virtio device id
|
||||
* @features: virtio features
|
||||
* @vq_num: the number of virtqueues
|
||||
* @vq_align: the allocation alignment of virtqueue's metadata
|
||||
* @reserved: for future use, needs to be initialized to zero
|
||||
* @config_size: the size of the configuration space
|
||||
* @config: the buffer of the configuration space
|
||||
*
|
||||
* Structure used by VDUSE_CREATE_DEV ioctl to create VDUSE device.
|
||||
*/
|
||||
struct vduse_dev_config {
|
||||
#define VDUSE_NAME_MAX 256
|
||||
char name[VDUSE_NAME_MAX];
|
||||
__u32 vendor_id;
|
||||
__u32 device_id;
|
||||
__u64 features;
|
||||
__u32 vq_num;
|
||||
__u32 vq_align;
|
||||
__u32 reserved[13];
|
||||
__u32 config_size;
|
||||
__u8 config[];
|
||||
};
|
||||
|
||||
/* Create a VDUSE device which is represented by a char device (/dev/vduse/$NAME) */
|
||||
#define VDUSE_CREATE_DEV _IOW(VDUSE_BASE, 0x02, struct vduse_dev_config)
|
||||
|
||||
/*
|
||||
* Destroy a VDUSE device. Make sure there are no more references
|
||||
* to the char device (/dev/vduse/$NAME).
|
||||
*/
|
||||
#define VDUSE_DESTROY_DEV _IOW(VDUSE_BASE, 0x03, char[VDUSE_NAME_MAX])
|
||||
|
||||
/* The ioctls for VDUSE device (/dev/vduse/$NAME) */
|
||||
|
||||
/**
|
||||
* struct vduse_iotlb_entry - entry of IOTLB to describe one IOVA region [start, last]
|
||||
* @offset: the mmap offset on returned file descriptor
|
||||
* @start: start of the IOVA region
|
||||
* @last: last of the IOVA region
|
||||
* @perm: access permission of the IOVA region
|
||||
*
|
||||
* Structure used by VDUSE_IOTLB_GET_FD ioctl to find an overlapped IOVA region.
|
||||
*/
|
||||
struct vduse_iotlb_entry {
|
||||
__u64 offset;
|
||||
__u64 start;
|
||||
__u64 last;
|
||||
#define VDUSE_ACCESS_RO 0x1
|
||||
#define VDUSE_ACCESS_WO 0x2
|
||||
#define VDUSE_ACCESS_RW 0x3
|
||||
__u8 perm;
|
||||
};
|
||||
|
||||
/*
|
||||
* Find the first IOVA region that overlaps with the range [start, last]
|
||||
* and return the corresponding file descriptor. Return -EINVAL means the
|
||||
* IOVA region doesn't exist. Caller should set start and last fields.
|
||||
*/
|
||||
#define VDUSE_IOTLB_GET_FD _IOWR(VDUSE_BASE, 0x10, struct vduse_iotlb_entry)
|
||||
|
||||
/*
|
||||
* Get the negotiated virtio features. It's a subset of the features in
|
||||
* struct vduse_dev_config which can be accepted by virtio driver. It's
|
||||
* only valid after FEATURES_OK status bit is set.
|
||||
*/
|
||||
#define VDUSE_DEV_GET_FEATURES _IOR(VDUSE_BASE, 0x11, __u64)
|
||||
|
||||
/**
|
||||
* struct vduse_config_data - data used to update configuration space
|
||||
* @offset: the offset from the beginning of configuration space
|
||||
* @length: the length to write to configuration space
|
||||
* @buffer: the buffer used to write from
|
||||
*
|
||||
* Structure used by VDUSE_DEV_SET_CONFIG ioctl to update device
|
||||
* configuration space.
|
||||
*/
|
||||
struct vduse_config_data {
|
||||
__u32 offset;
|
||||
__u32 length;
|
||||
__u8 buffer[];
|
||||
};
|
||||
|
||||
/* Set device configuration space */
|
||||
#define VDUSE_DEV_SET_CONFIG _IOW(VDUSE_BASE, 0x12, struct vduse_config_data)
|
||||
|
||||
/*
|
||||
* Inject a config interrupt. It's usually used to notify virtio driver
|
||||
* that device configuration space has changed.
|
||||
*/
|
||||
#define VDUSE_DEV_INJECT_CONFIG_IRQ _IO(VDUSE_BASE, 0x13)
|
||||
|
||||
/**
|
||||
* struct vduse_vq_config - basic configuration of a virtqueue
|
||||
* @index: virtqueue index
|
||||
* @max_size: the max size of virtqueue
|
||||
* @reserved: for future use, needs to be initialized to zero
|
||||
*
|
||||
* Structure used by VDUSE_VQ_SETUP ioctl to setup a virtqueue.
|
||||
*/
|
||||
struct vduse_vq_config {
|
||||
__u32 index;
|
||||
__u16 max_size;
|
||||
__u16 reserved[13];
|
||||
};
|
||||
|
||||
/*
|
||||
* Setup the specified virtqueue. Make sure all virtqueues have been
|
||||
* configured before the device is attached to vDPA bus.
|
||||
*/
|
||||
#define VDUSE_VQ_SETUP _IOW(VDUSE_BASE, 0x14, struct vduse_vq_config)
|
||||
|
||||
/**
|
||||
* struct vduse_vq_state_split - split virtqueue state
|
||||
* @avail_index: available index
|
||||
*/
|
||||
struct vduse_vq_state_split {
|
||||
__u16 avail_index;
|
||||
};
|
||||
|
||||
/**
|
||||
* struct vduse_vq_state_packed - packed virtqueue state
|
||||
* @last_avail_counter: last driver ring wrap counter observed by device
|
||||
* @last_avail_idx: device available index
|
||||
* @last_used_counter: device ring wrap counter
|
||||
* @last_used_idx: used index
|
||||
*/
|
||||
struct vduse_vq_state_packed {
|
||||
__u16 last_avail_counter;
|
||||
__u16 last_avail_idx;
|
||||
__u16 last_used_counter;
|
||||
__u16 last_used_idx;
|
||||
};
|
||||
|
||||
/**
|
||||
* struct vduse_vq_info - information of a virtqueue
|
||||
* @index: virtqueue index
|
||||
* @num: the size of virtqueue
|
||||
* @desc_addr: address of desc area
|
||||
* @driver_addr: address of driver area
|
||||
* @device_addr: address of device area
|
||||
* @split: split virtqueue state
|
||||
* @packed: packed virtqueue state
|
||||
* @ready: ready status of virtqueue
|
||||
*
|
||||
* Structure used by VDUSE_VQ_GET_INFO ioctl to get virtqueue's information.
|
||||
*/
|
||||
struct vduse_vq_info {
|
||||
__u32 index;
|
||||
__u32 num;
|
||||
__u64 desc_addr;
|
||||
__u64 driver_addr;
|
||||
__u64 device_addr;
|
||||
union {
|
||||
struct vduse_vq_state_split split;
|
||||
struct vduse_vq_state_packed packed;
|
||||
};
|
||||
__u8 ready;
|
||||
};
|
||||
|
||||
/* Get the specified virtqueue's information. Caller should set index field. */
|
||||
#define VDUSE_VQ_GET_INFO _IOWR(VDUSE_BASE, 0x15, struct vduse_vq_info)
|
||||
|
||||
/**
|
||||
* struct vduse_vq_eventfd - eventfd configuration for a virtqueue
|
||||
* @index: virtqueue index
|
||||
* @fd: eventfd, -1 means de-assigning the eventfd
|
||||
*
|
||||
* Structure used by VDUSE_VQ_SETUP_KICKFD ioctl to setup kick eventfd.
|
||||
*/
|
||||
struct vduse_vq_eventfd {
|
||||
__u32 index;
|
||||
#define VDUSE_EVENTFD_DEASSIGN -1
|
||||
int fd;
|
||||
};
|
||||
|
||||
/*
|
||||
* Setup kick eventfd for specified virtqueue. The kick eventfd is used
|
||||
* by VDUSE kernel module to notify userspace to consume the avail vring.
|
||||
*/
|
||||
#define VDUSE_VQ_SETUP_KICKFD _IOW(VDUSE_BASE, 0x16, struct vduse_vq_eventfd)
|
||||
|
||||
/*
|
||||
* Inject an interrupt for specific virtqueue. It's used to notify virtio driver
|
||||
* to consume the used vring.
|
||||
*/
|
||||
#define VDUSE_VQ_INJECT_IRQ _IOW(VDUSE_BASE, 0x17, __u32)
|
||||
|
||||
/* The control messages definition for read(2)/write(2) on /dev/vduse/$NAME */
|
||||
|
||||
/**
|
||||
* enum vduse_req_type - request type
|
||||
* @VDUSE_GET_VQ_STATE: get the state for specified virtqueue from userspace
|
||||
* @VDUSE_SET_STATUS: set the device status
|
||||
* @VDUSE_UPDATE_IOTLB: Notify userspace to update the memory mapping for
|
||||
* specified IOVA range via VDUSE_IOTLB_GET_FD ioctl
|
||||
*/
|
||||
enum vduse_req_type {
|
||||
VDUSE_GET_VQ_STATE,
|
||||
VDUSE_SET_STATUS,
|
||||
VDUSE_UPDATE_IOTLB,
|
||||
};
|
||||
|
||||
/**
|
||||
* struct vduse_vq_state - virtqueue state
|
||||
* @index: virtqueue index
|
||||
* @split: split virtqueue state
|
||||
* @packed: packed virtqueue state
|
||||
*/
|
||||
struct vduse_vq_state {
|
||||
__u32 index;
|
||||
union {
|
||||
struct vduse_vq_state_split split;
|
||||
struct vduse_vq_state_packed packed;
|
||||
};
|
||||
};
|
||||
|
||||
/**
|
||||
* struct vduse_dev_status - device status
|
||||
* @status: device status
|
||||
*/
|
||||
struct vduse_dev_status {
|
||||
__u8 status;
|
||||
};
|
||||
|
||||
/**
|
||||
* struct vduse_iova_range - IOVA range [start, last]
|
||||
* @start: start of the IOVA range
|
||||
* @last: last of the IOVA range
|
||||
*/
|
||||
struct vduse_iova_range {
|
||||
__u64 start;
|
||||
__u64 last;
|
||||
};
|
||||
|
||||
/**
|
||||
* struct vduse_dev_request - control request
|
||||
* @type: request type
|
||||
* @request_id: request id
|
||||
* @reserved: for future use
|
||||
* @vq_state: virtqueue state, only index field is available
|
||||
* @s: device status
|
||||
* @iova: IOVA range for updating
|
||||
* @padding: padding
|
||||
*
|
||||
* Structure used by read(2) on /dev/vduse/$NAME.
|
||||
*/
|
||||
struct vduse_dev_request {
|
||||
__u32 type;
|
||||
__u32 request_id;
|
||||
__u32 reserved[4];
|
||||
union {
|
||||
struct vduse_vq_state vq_state;
|
||||
struct vduse_dev_status s;
|
||||
struct vduse_iova_range iova;
|
||||
__u32 padding[32];
|
||||
};
|
||||
};
|
||||
|
||||
/**
|
||||
* struct vduse_dev_response - response to control request
|
||||
* @request_id: corresponding request id
|
||||
* @result: the result of request
|
||||
* @reserved: for future use, needs to be initialized to zero
|
||||
* @vq_state: virtqueue state
|
||||
* @padding: padding
|
||||
*
|
||||
* Structure used by write(2) on /dev/vduse/$NAME.
|
||||
*/
|
||||
struct vduse_dev_response {
|
||||
__u32 request_id;
|
||||
#define VDUSE_REQ_RESULT_OK 0x00
|
||||
#define VDUSE_REQ_RESULT_FAILED 0x01
|
||||
__u32 result;
|
||||
__u32 reserved[4];
|
||||
union {
|
||||
struct vduse_vq_state vq_state;
|
||||
__u32 padding[32];
|
||||
};
|
||||
};
|
||||
|
||||
#endif /* _UAPI_VDUSE_H_ */
|
@ -54,9 +54,18 @@
|
||||
#define VIRTIO_ID_SOUND 25 /* virtio sound */
|
||||
#define VIRTIO_ID_FS 26 /* virtio filesystem */
|
||||
#define VIRTIO_ID_PMEM 27 /* virtio pmem */
|
||||
#define VIRTIO_ID_RPMB 28 /* virtio rpmb */
|
||||
#define VIRTIO_ID_MAC80211_HWSIM 29 /* virtio mac80211-hwsim */
|
||||
#define VIRTIO_ID_VIDEO_ENCODER 30 /* virtio video encoder */
|
||||
#define VIRTIO_ID_VIDEO_DECODER 31 /* virtio video decoder */
|
||||
#define VIRTIO_ID_SCMI 32 /* virtio SCMI */
|
||||
#define VIRTIO_ID_NITRO_SEC_MOD 33 /* virtio nitro secure module*/
|
||||
#define VIRTIO_ID_I2C_ADAPTER 34 /* virtio i2c adapter */
|
||||
#define VIRTIO_ID_WATCHDOG 35 /* virtio watchdog */
|
||||
#define VIRTIO_ID_CAN 36 /* virtio can */
|
||||
#define VIRTIO_ID_DMABUF 37 /* virtio dmabuf */
|
||||
#define VIRTIO_ID_PARAM_SERV 38 /* virtio parameter server */
|
||||
#define VIRTIO_ID_AUDIO_POLICY 39 /* virtio audio policy */
|
||||
#define VIRTIO_ID_BT 40 /* virtio bluetooth */
|
||||
#define VIRTIO_ID_GPIO 41 /* virtio gpio */
|
||||
|
||||
|
@ -97,7 +97,8 @@ enum virtio_vsock_shutdown {
|
||||
|
||||
/* VIRTIO_VSOCK_OP_RW flags values */
|
||||
enum virtio_vsock_rw {
|
||||
VIRTIO_VSOCK_SEQ_EOR = 1,
|
||||
VIRTIO_VSOCK_SEQ_EOM = 1,
|
||||
VIRTIO_VSOCK_SEQ_EOR = 2,
|
||||
};
|
||||
|
||||
#endif /* _UAPI_LINUX_VIRTIO_VSOCK_H */
|
||||
|
@ -2014,7 +2014,7 @@ static int __vsock_seqpacket_recvmsg(struct sock *sk, struct msghdr *msg,
|
||||
{
|
||||
const struct vsock_transport *transport;
|
||||
struct vsock_sock *vsk;
|
||||
ssize_t record_len;
|
||||
ssize_t msg_len;
|
||||
long timeout;
|
||||
int err = 0;
|
||||
DEFINE_WAIT(wait);
|
||||
@ -2028,9 +2028,9 @@ static int __vsock_seqpacket_recvmsg(struct sock *sk, struct msghdr *msg,
|
||||
if (err <= 0)
|
||||
goto out;
|
||||
|
||||
record_len = transport->seqpacket_dequeue(vsk, msg, flags);
|
||||
msg_len = transport->seqpacket_dequeue(vsk, msg, flags);
|
||||
|
||||
if (record_len < 0) {
|
||||
if (msg_len < 0) {
|
||||
err = -ENOMEM;
|
||||
goto out;
|
||||
}
|
||||
@ -2044,14 +2044,14 @@ static int __vsock_seqpacket_recvmsg(struct sock *sk, struct msghdr *msg,
|
||||
* packet.
|
||||
*/
|
||||
if (flags & MSG_TRUNC)
|
||||
err = record_len;
|
||||
err = msg_len;
|
||||
else
|
||||
err = len - msg_data_left(msg);
|
||||
|
||||
/* Always set MSG_TRUNC if real length of packet is
|
||||
* bigger than user's buffer.
|
||||
*/
|
||||
if (record_len > len)
|
||||
if (msg_len > len)
|
||||
msg->msg_flags |= MSG_TRUNC;
|
||||
}
|
||||
|
||||
|
@ -76,8 +76,12 @@ virtio_transport_alloc_pkt(struct virtio_vsock_pkt_info *info,
|
||||
goto out;
|
||||
|
||||
if (msg_data_left(info->msg) == 0 &&
|
||||
info->type == VIRTIO_VSOCK_TYPE_SEQPACKET)
|
||||
pkt->hdr.flags |= cpu_to_le32(VIRTIO_VSOCK_SEQ_EOR);
|
||||
info->type == VIRTIO_VSOCK_TYPE_SEQPACKET) {
|
||||
pkt->hdr.flags |= cpu_to_le32(VIRTIO_VSOCK_SEQ_EOM);
|
||||
|
||||
if (info->msg->msg_flags & MSG_EOR)
|
||||
pkt->hdr.flags |= cpu_to_le32(VIRTIO_VSOCK_SEQ_EOR);
|
||||
}
|
||||
}
|
||||
|
||||
trace_virtio_transport_alloc_pkt(src_cid, src_port,
|
||||
@ -457,9 +461,12 @@ static int virtio_transport_seqpacket_do_dequeue(struct vsock_sock *vsk,
|
||||
dequeued_len += pkt_len;
|
||||
}
|
||||
|
||||
if (le32_to_cpu(pkt->hdr.flags) & VIRTIO_VSOCK_SEQ_EOR) {
|
||||
if (le32_to_cpu(pkt->hdr.flags) & VIRTIO_VSOCK_SEQ_EOM) {
|
||||
msg_ready = true;
|
||||
vvs->msg_count--;
|
||||
|
||||
if (le32_to_cpu(pkt->hdr.flags) & VIRTIO_VSOCK_SEQ_EOR)
|
||||
msg->msg_flags |= MSG_EOR;
|
||||
}
|
||||
|
||||
virtio_transport_dec_rx_pkt(vvs, pkt);
|
||||
@ -1029,7 +1036,7 @@ virtio_transport_recv_enqueue(struct vsock_sock *vsk,
|
||||
goto out;
|
||||
}
|
||||
|
||||
if (le32_to_cpu(pkt->hdr.flags) & VIRTIO_VSOCK_SEQ_EOR)
|
||||
if (le32_to_cpu(pkt->hdr.flags) & VIRTIO_VSOCK_SEQ_EOM)
|
||||
vvs->msg_count++;
|
||||
|
||||
/* Try to copy small packets into the buffer of last packet queued,
|
||||
@ -1044,12 +1051,12 @@ virtio_transport_recv_enqueue(struct vsock_sock *vsk,
|
||||
|
||||
/* If there is space in the last packet queued, we copy the
|
||||
* new packet in its buffer. We avoid this if the last packet
|
||||
* queued has VIRTIO_VSOCK_SEQ_EOR set, because this is
|
||||
* delimiter of SEQPACKET record, so 'pkt' is the first packet
|
||||
* of a new record.
|
||||
* queued has VIRTIO_VSOCK_SEQ_EOM set, because this is
|
||||
* delimiter of SEQPACKET message, so 'pkt' is the first packet
|
||||
* of a new message.
|
||||
*/
|
||||
if ((pkt->len <= last_pkt->buf_len - last_pkt->len) &&
|
||||
!(le32_to_cpu(last_pkt->hdr.flags) & VIRTIO_VSOCK_SEQ_EOR)) {
|
||||
!(le32_to_cpu(last_pkt->hdr.flags) & VIRTIO_VSOCK_SEQ_EOM)) {
|
||||
memcpy(last_pkt->buf + last_pkt->len, pkt->buf,
|
||||
pkt->len);
|
||||
last_pkt->len += pkt->len;
|
||||
|
@ -282,6 +282,7 @@ static void test_stream_msg_peek_server(const struct test_opts *opts)
|
||||
}
|
||||
|
||||
#define MESSAGES_CNT 7
|
||||
#define MSG_EOR_IDX (MESSAGES_CNT / 2)
|
||||
static void test_seqpacket_msg_bounds_client(const struct test_opts *opts)
|
||||
{
|
||||
int fd;
|
||||
@ -294,7 +295,7 @@ static void test_seqpacket_msg_bounds_client(const struct test_opts *opts)
|
||||
|
||||
/* Send several messages, one with MSG_EOR flag */
|
||||
for (int i = 0; i < MESSAGES_CNT; i++)
|
||||
send_byte(fd, 1, 0);
|
||||
send_byte(fd, 1, (i == MSG_EOR_IDX) ? MSG_EOR : 0);
|
||||
|
||||
control_writeln("SENDDONE");
|
||||
close(fd);
|
||||
@ -324,6 +325,11 @@ static void test_seqpacket_msg_bounds_server(const struct test_opts *opts)
|
||||
perror("message bound violated");
|
||||
exit(EXIT_FAILURE);
|
||||
}
|
||||
|
||||
if ((i == MSG_EOR_IDX) ^ !!(msg.msg_flags & MSG_EOR)) {
|
||||
perror("MSG_EOR");
|
||||
exit(EXIT_FAILURE);
|
||||
}
|
||||
}
|
||||
|
||||
close(fd);
|
||||
|
Loading…
Reference in New Issue
Block a user