f615625a44
p9_client_zc_rpc()/p9_check_zc_errors() are playing fast and loose with copy_from_iter_full(). Reading from file is done by sending Tread request. Response consists of fixed-sized header (including the amount of data actually read) followed by the data itself. For zero-copy case we arrange the things so that the first 11 bytes of reply go into the fixed-sized buffer, with the rest going straight into the pages we want to read into. What makes the things inconvenient is that sglist describing what should go where has to be set *before* the reply arrives. As the result, if reply is an error, the things get interesting. On success we get size[4] Rread tag[2] count[4] data[count] For error layout varies depending upon the protocol variant - in original 9P and 9P2000 it's size[4] Rerror tag[2] len[2] error[len] in 9P2000.U size[4] Rerror tag[2] len[2] error[len] errno[4] in 9P2000.L size[4] Rlerror tag[2] errno[4] The last case is nice and simple - we have an 11-byte response that fits into the fixed-sized buffer we hoped to get an Rread into. In other two, though, we get a variable-length string spill into the pages we'd prepared for the data to be read. Had that been in fixed-sized buffer (which is actually 4K), we would've dealt with that the same way we handle non-zerocopy case. However, for zerocopy it doesn't end up there, so we need to copy it from those pages. The trouble is, by the time we get around to that, the references to pages in question are already dropped. As the result, p9_zc_check_errors() tries to get the data using copy_from_iter_full(). Unfortunately, the iov_iter it's trying to read from might *NOT* be capable of that. It is, after all, a data destination, not data source. In particular, if it's an ITER_PIPE one, copy_from_iter_full() will simply fail. In ->zc_request() itself we do have those pages and dealing with the problem in there would be a simple matter of memcpy_from_page() into the fixed-sized buffer. Moreover, it isn't hard to recognize the (rare) case when such copying is needed. That way we get rid of p9_zc_check_errors() entirely - p9_check_errors() can be used instead both for zero-copy and non-zero-copy cases. Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
837 lines
21 KiB
C
837 lines
21 KiB
C
// SPDX-License-Identifier: GPL-2.0-only
|
|
/*
|
|
* The Virtio 9p transport driver
|
|
*
|
|
* This is a block based transport driver based on the lguest block driver
|
|
* code.
|
|
*
|
|
* Copyright (C) 2007, 2008 Eric Van Hensbergen, IBM Corporation
|
|
*
|
|
* Based on virtio console driver
|
|
* Copyright (C) 2006, 2007 Rusty Russell, IBM Corporation
|
|
*/
|
|
|
|
#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
|
|
|
|
#include <linux/in.h>
|
|
#include <linux/module.h>
|
|
#include <linux/net.h>
|
|
#include <linux/ipv6.h>
|
|
#include <linux/errno.h>
|
|
#include <linux/kernel.h>
|
|
#include <linux/un.h>
|
|
#include <linux/uaccess.h>
|
|
#include <linux/inet.h>
|
|
#include <linux/idr.h>
|
|
#include <linux/file.h>
|
|
#include <linux/highmem.h>
|
|
#include <linux/slab.h>
|
|
#include <net/9p/9p.h>
|
|
#include <linux/parser.h>
|
|
#include <net/9p/client.h>
|
|
#include <net/9p/transport.h>
|
|
#include <linux/scatterlist.h>
|
|
#include <linux/swap.h>
|
|
#include <linux/virtio.h>
|
|
#include <linux/virtio_9p.h>
|
|
#include "trans_common.h"
|
|
|
|
#define VIRTQUEUE_NUM 128
|
|
|
|
/* a single mutex to manage channel initialization and attachment */
|
|
static DEFINE_MUTEX(virtio_9p_lock);
|
|
static DECLARE_WAIT_QUEUE_HEAD(vp_wq);
|
|
static atomic_t vp_pinned = ATOMIC_INIT(0);
|
|
|
|
/**
|
|
* struct virtio_chan - per-instance transport information
|
|
* @inuse: whether the channel is in use
|
|
* @lock: protects multiple elements within this structure
|
|
* @client: client instance
|
|
* @vdev: virtio dev associated with this channel
|
|
* @vq: virtio queue associated with this channel
|
|
* @ring_bufs_avail: flag to indicate there is some available in the ring buf
|
|
* @vc_wq: wait queue for waiting for thing to be added to ring buf
|
|
* @p9_max_pages: maximum number of pinned pages
|
|
* @sg: scatter gather list which is used to pack a request (protected?)
|
|
* @chan_list: linked list of channels
|
|
*
|
|
* We keep all per-channel information in a structure.
|
|
* This structure is allocated within the devices dev->mem space.
|
|
* A pointer to the structure will get put in the transport private.
|
|
*
|
|
*/
|
|
|
|
struct virtio_chan {
|
|
bool inuse;
|
|
|
|
spinlock_t lock;
|
|
|
|
struct p9_client *client;
|
|
struct virtio_device *vdev;
|
|
struct virtqueue *vq;
|
|
int ring_bufs_avail;
|
|
wait_queue_head_t *vc_wq;
|
|
/* This is global limit. Since we don't have a global structure,
|
|
* will be placing it in each channel.
|
|
*/
|
|
unsigned long p9_max_pages;
|
|
/* Scatterlist: can be too big for stack. */
|
|
struct scatterlist sg[VIRTQUEUE_NUM];
|
|
/**
|
|
* @tag: name to identify a mount null terminated
|
|
*/
|
|
char *tag;
|
|
|
|
struct list_head chan_list;
|
|
};
|
|
|
|
static struct list_head virtio_chan_list;
|
|
|
|
/* How many bytes left in this page. */
|
|
static unsigned int rest_of_page(void *data)
|
|
{
|
|
return PAGE_SIZE - offset_in_page(data);
|
|
}
|
|
|
|
/**
|
|
* p9_virtio_close - reclaim resources of a channel
|
|
* @client: client instance
|
|
*
|
|
* This reclaims a channel by freeing its resources and
|
|
* resetting its inuse flag.
|
|
*
|
|
*/
|
|
|
|
static void p9_virtio_close(struct p9_client *client)
|
|
{
|
|
struct virtio_chan *chan = client->trans;
|
|
|
|
mutex_lock(&virtio_9p_lock);
|
|
if (chan)
|
|
chan->inuse = false;
|
|
mutex_unlock(&virtio_9p_lock);
|
|
}
|
|
|
|
/**
|
|
* req_done - callback which signals activity from the server
|
|
* @vq: virtio queue activity was received on
|
|
*
|
|
* This notifies us that the server has triggered some activity
|
|
* on the virtio channel - most likely a response to request we
|
|
* sent. Figure out which requests now have responses and wake up
|
|
* those threads.
|
|
*
|
|
* Bugs: could do with some additional sanity checking, but appears to work.
|
|
*
|
|
*/
|
|
|
|
static void req_done(struct virtqueue *vq)
|
|
{
|
|
struct virtio_chan *chan = vq->vdev->priv;
|
|
unsigned int len;
|
|
struct p9_req_t *req;
|
|
bool need_wakeup = false;
|
|
unsigned long flags;
|
|
|
|
p9_debug(P9_DEBUG_TRANS, ": request done\n");
|
|
|
|
spin_lock_irqsave(&chan->lock, flags);
|
|
while ((req = virtqueue_get_buf(chan->vq, &len)) != NULL) {
|
|
if (!chan->ring_bufs_avail) {
|
|
chan->ring_bufs_avail = 1;
|
|
need_wakeup = true;
|
|
}
|
|
|
|
if (len) {
|
|
req->rc.size = len;
|
|
p9_client_cb(chan->client, req, REQ_STATUS_RCVD);
|
|
}
|
|
}
|
|
spin_unlock_irqrestore(&chan->lock, flags);
|
|
/* Wakeup if anyone waiting for VirtIO ring space. */
|
|
if (need_wakeup)
|
|
wake_up(chan->vc_wq);
|
|
}
|
|
|
|
/**
|
|
* pack_sg_list - pack a scatter gather list from a linear buffer
|
|
* @sg: scatter/gather list to pack into
|
|
* @start: which segment of the sg_list to start at
|
|
* @limit: maximum segment to pack data to
|
|
* @data: data to pack into scatter/gather list
|
|
* @count: amount of data to pack into the scatter/gather list
|
|
*
|
|
* sg_lists have multiple segments of various sizes. This will pack
|
|
* arbitrary data into an existing scatter gather list, segmenting the
|
|
* data as necessary within constraints.
|
|
*
|
|
*/
|
|
|
|
static int pack_sg_list(struct scatterlist *sg, int start,
|
|
int limit, char *data, int count)
|
|
{
|
|
int s;
|
|
int index = start;
|
|
|
|
while (count) {
|
|
s = rest_of_page(data);
|
|
if (s > count)
|
|
s = count;
|
|
BUG_ON(index >= limit);
|
|
/* Make sure we don't terminate early. */
|
|
sg_unmark_end(&sg[index]);
|
|
sg_set_buf(&sg[index++], data, s);
|
|
count -= s;
|
|
data += s;
|
|
}
|
|
if (index-start)
|
|
sg_mark_end(&sg[index - 1]);
|
|
return index-start;
|
|
}
|
|
|
|
/* We don't currently allow canceling of virtio requests */
|
|
static int p9_virtio_cancel(struct p9_client *client, struct p9_req_t *req)
|
|
{
|
|
return 1;
|
|
}
|
|
|
|
/* Reply won't come, so drop req ref */
|
|
static int p9_virtio_cancelled(struct p9_client *client, struct p9_req_t *req)
|
|
{
|
|
p9_req_put(req);
|
|
return 0;
|
|
}
|
|
|
|
/**
|
|
* pack_sg_list_p - Just like pack_sg_list. Instead of taking a buffer,
|
|
* this takes a list of pages.
|
|
* @sg: scatter/gather list to pack into
|
|
* @start: which segment of the sg_list to start at
|
|
* @limit: maximum number of pages in sg list.
|
|
* @pdata: a list of pages to add into sg.
|
|
* @nr_pages: number of pages to pack into the scatter/gather list
|
|
* @offs: amount of data in the beginning of first page _not_ to pack
|
|
* @count: amount of data to pack into the scatter/gather list
|
|
*/
|
|
static int
|
|
pack_sg_list_p(struct scatterlist *sg, int start, int limit,
|
|
struct page **pdata, int nr_pages, size_t offs, int count)
|
|
{
|
|
int i = 0, s;
|
|
int data_off = offs;
|
|
int index = start;
|
|
|
|
BUG_ON(nr_pages > (limit - start));
|
|
/*
|
|
* if the first page doesn't start at
|
|
* page boundary find the offset
|
|
*/
|
|
while (nr_pages) {
|
|
s = PAGE_SIZE - data_off;
|
|
if (s > count)
|
|
s = count;
|
|
BUG_ON(index >= limit);
|
|
/* Make sure we don't terminate early. */
|
|
sg_unmark_end(&sg[index]);
|
|
sg_set_page(&sg[index++], pdata[i++], s, data_off);
|
|
data_off = 0;
|
|
count -= s;
|
|
nr_pages--;
|
|
}
|
|
|
|
if (index-start)
|
|
sg_mark_end(&sg[index - 1]);
|
|
return index - start;
|
|
}
|
|
|
|
/**
|
|
* p9_virtio_request - issue a request
|
|
* @client: client instance issuing the request
|
|
* @req: request to be issued
|
|
*
|
|
*/
|
|
|
|
static int
|
|
p9_virtio_request(struct p9_client *client, struct p9_req_t *req)
|
|
{
|
|
int err;
|
|
int in, out, out_sgs, in_sgs;
|
|
unsigned long flags;
|
|
struct virtio_chan *chan = client->trans;
|
|
struct scatterlist *sgs[2];
|
|
|
|
p9_debug(P9_DEBUG_TRANS, "9p debug: virtio request\n");
|
|
|
|
req->status = REQ_STATUS_SENT;
|
|
req_retry:
|
|
spin_lock_irqsave(&chan->lock, flags);
|
|
|
|
out_sgs = in_sgs = 0;
|
|
/* Handle out VirtIO ring buffers */
|
|
out = pack_sg_list(chan->sg, 0,
|
|
VIRTQUEUE_NUM, req->tc.sdata, req->tc.size);
|
|
if (out)
|
|
sgs[out_sgs++] = chan->sg;
|
|
|
|
in = pack_sg_list(chan->sg, out,
|
|
VIRTQUEUE_NUM, req->rc.sdata, req->rc.capacity);
|
|
if (in)
|
|
sgs[out_sgs + in_sgs++] = chan->sg + out;
|
|
|
|
err = virtqueue_add_sgs(chan->vq, sgs, out_sgs, in_sgs, req,
|
|
GFP_ATOMIC);
|
|
if (err < 0) {
|
|
if (err == -ENOSPC) {
|
|
chan->ring_bufs_avail = 0;
|
|
spin_unlock_irqrestore(&chan->lock, flags);
|
|
err = wait_event_killable(*chan->vc_wq,
|
|
chan->ring_bufs_avail);
|
|
if (err == -ERESTARTSYS)
|
|
return err;
|
|
|
|
p9_debug(P9_DEBUG_TRANS, "Retry virtio request\n");
|
|
goto req_retry;
|
|
} else {
|
|
spin_unlock_irqrestore(&chan->lock, flags);
|
|
p9_debug(P9_DEBUG_TRANS,
|
|
"virtio rpc add_sgs returned failure\n");
|
|
return -EIO;
|
|
}
|
|
}
|
|
virtqueue_kick(chan->vq);
|
|
spin_unlock_irqrestore(&chan->lock, flags);
|
|
|
|
p9_debug(P9_DEBUG_TRANS, "virtio request kicked\n");
|
|
return 0;
|
|
}
|
|
|
|
static int p9_get_mapped_pages(struct virtio_chan *chan,
|
|
struct page ***pages,
|
|
struct iov_iter *data,
|
|
int count,
|
|
size_t *offs,
|
|
int *need_drop)
|
|
{
|
|
int nr_pages;
|
|
int err;
|
|
|
|
if (!iov_iter_count(data))
|
|
return 0;
|
|
|
|
if (!iov_iter_is_kvec(data)) {
|
|
int n;
|
|
/*
|
|
* We allow only p9_max_pages pinned. We wait for the
|
|
* Other zc request to finish here
|
|
*/
|
|
if (atomic_read(&vp_pinned) >= chan->p9_max_pages) {
|
|
err = wait_event_killable(vp_wq,
|
|
(atomic_read(&vp_pinned) < chan->p9_max_pages));
|
|
if (err == -ERESTARTSYS)
|
|
return err;
|
|
}
|
|
n = iov_iter_get_pages_alloc(data, pages, count, offs);
|
|
if (n < 0)
|
|
return n;
|
|
*need_drop = 1;
|
|
nr_pages = DIV_ROUND_UP(n + *offs, PAGE_SIZE);
|
|
atomic_add(nr_pages, &vp_pinned);
|
|
return n;
|
|
} else {
|
|
/* kernel buffer, no need to pin pages */
|
|
int index;
|
|
size_t len;
|
|
void *p;
|
|
|
|
/* we'd already checked that it's non-empty */
|
|
while (1) {
|
|
len = iov_iter_single_seg_count(data);
|
|
if (likely(len)) {
|
|
p = data->kvec->iov_base + data->iov_offset;
|
|
break;
|
|
}
|
|
iov_iter_advance(data, 0);
|
|
}
|
|
if (len > count)
|
|
len = count;
|
|
|
|
nr_pages = DIV_ROUND_UP((unsigned long)p + len, PAGE_SIZE) -
|
|
(unsigned long)p / PAGE_SIZE;
|
|
|
|
*pages = kmalloc_array(nr_pages, sizeof(struct page *),
|
|
GFP_NOFS);
|
|
if (!*pages)
|
|
return -ENOMEM;
|
|
|
|
*need_drop = 0;
|
|
p -= (*offs = offset_in_page(p));
|
|
for (index = 0; index < nr_pages; index++) {
|
|
if (is_vmalloc_addr(p))
|
|
(*pages)[index] = vmalloc_to_page(p);
|
|
else
|
|
(*pages)[index] = kmap_to_page(p);
|
|
p += PAGE_SIZE;
|
|
}
|
|
return len;
|
|
}
|
|
}
|
|
|
|
static void handle_rerror(struct p9_req_t *req, int in_hdr_len,
|
|
size_t offs, struct page **pages)
|
|
{
|
|
unsigned size, n;
|
|
void *to = req->rc.sdata + in_hdr_len;
|
|
|
|
// Fits entirely into the static data? Nothing to do.
|
|
if (req->rc.size < in_hdr_len)
|
|
return;
|
|
|
|
// Really long error message? Tough, truncate the reply. Might get
|
|
// rejected (we can't be arsed to adjust the size encoded in header,
|
|
// or string size for that matter), but it wouldn't be anything valid
|
|
// anyway.
|
|
if (unlikely(req->rc.size > P9_ZC_HDR_SZ))
|
|
req->rc.size = P9_ZC_HDR_SZ;
|
|
|
|
// data won't span more than two pages
|
|
size = req->rc.size - in_hdr_len;
|
|
n = PAGE_SIZE - offs;
|
|
if (size > n) {
|
|
memcpy_from_page(to, *pages++, offs, n);
|
|
offs = 0;
|
|
to += n;
|
|
size -= n;
|
|
}
|
|
memcpy_from_page(to, *pages, offs, size);
|
|
}
|
|
|
|
/**
|
|
* p9_virtio_zc_request - issue a zero copy request
|
|
* @client: client instance issuing the request
|
|
* @req: request to be issued
|
|
* @uidata: user buffer that should be used for zero copy read
|
|
* @uodata: user buffer that should be used for zero copy write
|
|
* @inlen: read buffer size
|
|
* @outlen: write buffer size
|
|
* @in_hdr_len: reader header size, This is the size of response protocol data
|
|
*
|
|
*/
|
|
static int
|
|
p9_virtio_zc_request(struct p9_client *client, struct p9_req_t *req,
|
|
struct iov_iter *uidata, struct iov_iter *uodata,
|
|
int inlen, int outlen, int in_hdr_len)
|
|
{
|
|
int in, out, err, out_sgs, in_sgs;
|
|
unsigned long flags;
|
|
int in_nr_pages = 0, out_nr_pages = 0;
|
|
struct page **in_pages = NULL, **out_pages = NULL;
|
|
struct virtio_chan *chan = client->trans;
|
|
struct scatterlist *sgs[4];
|
|
size_t offs;
|
|
int need_drop = 0;
|
|
int kicked = 0;
|
|
|
|
p9_debug(P9_DEBUG_TRANS, "virtio request\n");
|
|
|
|
if (uodata) {
|
|
__le32 sz;
|
|
int n = p9_get_mapped_pages(chan, &out_pages, uodata,
|
|
outlen, &offs, &need_drop);
|
|
if (n < 0) {
|
|
err = n;
|
|
goto err_out;
|
|
}
|
|
out_nr_pages = DIV_ROUND_UP(n + offs, PAGE_SIZE);
|
|
if (n != outlen) {
|
|
__le32 v = cpu_to_le32(n);
|
|
memcpy(&req->tc.sdata[req->tc.size - 4], &v, 4);
|
|
outlen = n;
|
|
}
|
|
/* The size field of the message must include the length of the
|
|
* header and the length of the data. We didn't actually know
|
|
* the length of the data until this point so add it in now.
|
|
*/
|
|
sz = cpu_to_le32(req->tc.size + outlen);
|
|
memcpy(&req->tc.sdata[0], &sz, sizeof(sz));
|
|
} else if (uidata) {
|
|
int n = p9_get_mapped_pages(chan, &in_pages, uidata,
|
|
inlen, &offs, &need_drop);
|
|
if (n < 0) {
|
|
err = n;
|
|
goto err_out;
|
|
}
|
|
in_nr_pages = DIV_ROUND_UP(n + offs, PAGE_SIZE);
|
|
if (n != inlen) {
|
|
__le32 v = cpu_to_le32(n);
|
|
memcpy(&req->tc.sdata[req->tc.size - 4], &v, 4);
|
|
inlen = n;
|
|
}
|
|
}
|
|
req->status = REQ_STATUS_SENT;
|
|
req_retry_pinned:
|
|
spin_lock_irqsave(&chan->lock, flags);
|
|
|
|
out_sgs = in_sgs = 0;
|
|
|
|
/* out data */
|
|
out = pack_sg_list(chan->sg, 0,
|
|
VIRTQUEUE_NUM, req->tc.sdata, req->tc.size);
|
|
|
|
if (out)
|
|
sgs[out_sgs++] = chan->sg;
|
|
|
|
if (out_pages) {
|
|
sgs[out_sgs++] = chan->sg + out;
|
|
out += pack_sg_list_p(chan->sg, out, VIRTQUEUE_NUM,
|
|
out_pages, out_nr_pages, offs, outlen);
|
|
}
|
|
|
|
/*
|
|
* Take care of in data
|
|
* For example TREAD have 11.
|
|
* 11 is the read/write header = PDU Header(7) + IO Size (4).
|
|
* Arrange in such a way that server places header in the
|
|
* allocated memory and payload onto the user buffer.
|
|
*/
|
|
in = pack_sg_list(chan->sg, out,
|
|
VIRTQUEUE_NUM, req->rc.sdata, in_hdr_len);
|
|
if (in)
|
|
sgs[out_sgs + in_sgs++] = chan->sg + out;
|
|
|
|
if (in_pages) {
|
|
sgs[out_sgs + in_sgs++] = chan->sg + out + in;
|
|
in += pack_sg_list_p(chan->sg, out + in, VIRTQUEUE_NUM,
|
|
in_pages, in_nr_pages, offs, inlen);
|
|
}
|
|
|
|
BUG_ON(out_sgs + in_sgs > ARRAY_SIZE(sgs));
|
|
err = virtqueue_add_sgs(chan->vq, sgs, out_sgs, in_sgs, req,
|
|
GFP_ATOMIC);
|
|
if (err < 0) {
|
|
if (err == -ENOSPC) {
|
|
chan->ring_bufs_avail = 0;
|
|
spin_unlock_irqrestore(&chan->lock, flags);
|
|
err = wait_event_killable(*chan->vc_wq,
|
|
chan->ring_bufs_avail);
|
|
if (err == -ERESTARTSYS)
|
|
goto err_out;
|
|
|
|
p9_debug(P9_DEBUG_TRANS, "Retry virtio request\n");
|
|
goto req_retry_pinned;
|
|
} else {
|
|
spin_unlock_irqrestore(&chan->lock, flags);
|
|
p9_debug(P9_DEBUG_TRANS,
|
|
"virtio rpc add_sgs returned failure\n");
|
|
err = -EIO;
|
|
goto err_out;
|
|
}
|
|
}
|
|
virtqueue_kick(chan->vq);
|
|
spin_unlock_irqrestore(&chan->lock, flags);
|
|
kicked = 1;
|
|
p9_debug(P9_DEBUG_TRANS, "virtio request kicked\n");
|
|
err = wait_event_killable(req->wq, req->status >= REQ_STATUS_RCVD);
|
|
// RERROR needs reply (== error string) in static data
|
|
if (req->status == REQ_STATUS_RCVD &&
|
|
unlikely(req->rc.sdata[4] == P9_RERROR))
|
|
handle_rerror(req, in_hdr_len, offs, in_pages);
|
|
|
|
/*
|
|
* Non kernel buffers are pinned, unpin them
|
|
*/
|
|
err_out:
|
|
if (need_drop) {
|
|
if (in_pages) {
|
|
p9_release_pages(in_pages, in_nr_pages);
|
|
atomic_sub(in_nr_pages, &vp_pinned);
|
|
}
|
|
if (out_pages) {
|
|
p9_release_pages(out_pages, out_nr_pages);
|
|
atomic_sub(out_nr_pages, &vp_pinned);
|
|
}
|
|
/* wakeup anybody waiting for slots to pin pages */
|
|
wake_up(&vp_wq);
|
|
}
|
|
kvfree(in_pages);
|
|
kvfree(out_pages);
|
|
if (!kicked) {
|
|
/* reply won't come */
|
|
p9_req_put(req);
|
|
}
|
|
return err;
|
|
}
|
|
|
|
static ssize_t p9_mount_tag_show(struct device *dev,
|
|
struct device_attribute *attr, char *buf)
|
|
{
|
|
struct virtio_chan *chan;
|
|
struct virtio_device *vdev;
|
|
int tag_len;
|
|
|
|
vdev = dev_to_virtio(dev);
|
|
chan = vdev->priv;
|
|
tag_len = strlen(chan->tag);
|
|
|
|
memcpy(buf, chan->tag, tag_len + 1);
|
|
|
|
return tag_len + 1;
|
|
}
|
|
|
|
static DEVICE_ATTR(mount_tag, 0444, p9_mount_tag_show, NULL);
|
|
|
|
/**
|
|
* p9_virtio_probe - probe for existence of 9P virtio channels
|
|
* @vdev: virtio device to probe
|
|
*
|
|
* This probes for existing virtio channels.
|
|
*
|
|
*/
|
|
|
|
static int p9_virtio_probe(struct virtio_device *vdev)
|
|
{
|
|
__u16 tag_len;
|
|
char *tag;
|
|
int err;
|
|
struct virtio_chan *chan;
|
|
|
|
if (!vdev->config->get) {
|
|
dev_err(&vdev->dev, "%s failure: config access disabled\n",
|
|
__func__);
|
|
return -EINVAL;
|
|
}
|
|
|
|
chan = kmalloc(sizeof(struct virtio_chan), GFP_KERNEL);
|
|
if (!chan) {
|
|
pr_err("Failed to allocate virtio 9P channel\n");
|
|
err = -ENOMEM;
|
|
goto fail;
|
|
}
|
|
|
|
chan->vdev = vdev;
|
|
|
|
/* We expect one virtqueue, for requests. */
|
|
chan->vq = virtio_find_single_vq(vdev, req_done, "requests");
|
|
if (IS_ERR(chan->vq)) {
|
|
err = PTR_ERR(chan->vq);
|
|
goto out_free_chan;
|
|
}
|
|
chan->vq->vdev->priv = chan;
|
|
spin_lock_init(&chan->lock);
|
|
|
|
sg_init_table(chan->sg, VIRTQUEUE_NUM);
|
|
|
|
chan->inuse = false;
|
|
if (virtio_has_feature(vdev, VIRTIO_9P_MOUNT_TAG)) {
|
|
virtio_cread(vdev, struct virtio_9p_config, tag_len, &tag_len);
|
|
} else {
|
|
err = -EINVAL;
|
|
goto out_free_vq;
|
|
}
|
|
tag = kzalloc(tag_len + 1, GFP_KERNEL);
|
|
if (!tag) {
|
|
err = -ENOMEM;
|
|
goto out_free_vq;
|
|
}
|
|
|
|
virtio_cread_bytes(vdev, offsetof(struct virtio_9p_config, tag),
|
|
tag, tag_len);
|
|
chan->tag = tag;
|
|
err = sysfs_create_file(&(vdev->dev.kobj), &dev_attr_mount_tag.attr);
|
|
if (err) {
|
|
goto out_free_tag;
|
|
}
|
|
chan->vc_wq = kmalloc(sizeof(wait_queue_head_t), GFP_KERNEL);
|
|
if (!chan->vc_wq) {
|
|
err = -ENOMEM;
|
|
goto out_remove_file;
|
|
}
|
|
init_waitqueue_head(chan->vc_wq);
|
|
chan->ring_bufs_avail = 1;
|
|
/* Ceiling limit to avoid denial of service attacks */
|
|
chan->p9_max_pages = nr_free_buffer_pages()/4;
|
|
|
|
virtio_device_ready(vdev);
|
|
|
|
mutex_lock(&virtio_9p_lock);
|
|
list_add_tail(&chan->chan_list, &virtio_chan_list);
|
|
mutex_unlock(&virtio_9p_lock);
|
|
|
|
/* Let udev rules use the new mount_tag attribute. */
|
|
kobject_uevent(&(vdev->dev.kobj), KOBJ_CHANGE);
|
|
|
|
return 0;
|
|
|
|
out_remove_file:
|
|
sysfs_remove_file(&vdev->dev.kobj, &dev_attr_mount_tag.attr);
|
|
out_free_tag:
|
|
kfree(tag);
|
|
out_free_vq:
|
|
vdev->config->del_vqs(vdev);
|
|
out_free_chan:
|
|
kfree(chan);
|
|
fail:
|
|
return err;
|
|
}
|
|
|
|
|
|
/**
|
|
* p9_virtio_create - allocate a new virtio channel
|
|
* @client: client instance invoking this transport
|
|
* @devname: string identifying the channel to connect to (unused)
|
|
* @args: args passed from sys_mount() for per-transport options (unused)
|
|
*
|
|
* This sets up a transport channel for 9p communication. Right now
|
|
* we only match the first available channel, but eventually we could look up
|
|
* alternate channels by matching devname versus a virtio_config entry.
|
|
* We use a simple reference count mechanism to ensure that only a single
|
|
* mount has a channel open at a time.
|
|
*
|
|
*/
|
|
|
|
static int
|
|
p9_virtio_create(struct p9_client *client, const char *devname, char *args)
|
|
{
|
|
struct virtio_chan *chan;
|
|
int ret = -ENOENT;
|
|
int found = 0;
|
|
|
|
if (devname == NULL)
|
|
return -EINVAL;
|
|
|
|
mutex_lock(&virtio_9p_lock);
|
|
list_for_each_entry(chan, &virtio_chan_list, chan_list) {
|
|
if (!strcmp(devname, chan->tag)) {
|
|
if (!chan->inuse) {
|
|
chan->inuse = true;
|
|
found = 1;
|
|
break;
|
|
}
|
|
ret = -EBUSY;
|
|
}
|
|
}
|
|
mutex_unlock(&virtio_9p_lock);
|
|
|
|
if (!found) {
|
|
pr_err("no channels available for device %s\n", devname);
|
|
return ret;
|
|
}
|
|
|
|
client->trans = (void *)chan;
|
|
client->status = Connected;
|
|
chan->client = client;
|
|
|
|
return 0;
|
|
}
|
|
|
|
/**
|
|
* p9_virtio_remove - clean up resources associated with a virtio device
|
|
* @vdev: virtio device to remove
|
|
*
|
|
*/
|
|
|
|
static void p9_virtio_remove(struct virtio_device *vdev)
|
|
{
|
|
struct virtio_chan *chan = vdev->priv;
|
|
unsigned long warning_time;
|
|
|
|
mutex_lock(&virtio_9p_lock);
|
|
|
|
/* Remove self from list so we don't get new users. */
|
|
list_del(&chan->chan_list);
|
|
warning_time = jiffies;
|
|
|
|
/* Wait for existing users to close. */
|
|
while (chan->inuse) {
|
|
mutex_unlock(&virtio_9p_lock);
|
|
msleep(250);
|
|
if (time_after(jiffies, warning_time + 10 * HZ)) {
|
|
dev_emerg(&vdev->dev,
|
|
"p9_virtio_remove: waiting for device in use.\n");
|
|
warning_time = jiffies;
|
|
}
|
|
mutex_lock(&virtio_9p_lock);
|
|
}
|
|
|
|
mutex_unlock(&virtio_9p_lock);
|
|
|
|
virtio_reset_device(vdev);
|
|
vdev->config->del_vqs(vdev);
|
|
|
|
sysfs_remove_file(&(vdev->dev.kobj), &dev_attr_mount_tag.attr);
|
|
kobject_uevent(&(vdev->dev.kobj), KOBJ_CHANGE);
|
|
kfree(chan->tag);
|
|
kfree(chan->vc_wq);
|
|
kfree(chan);
|
|
|
|
}
|
|
|
|
static struct virtio_device_id id_table[] = {
|
|
{ VIRTIO_ID_9P, VIRTIO_DEV_ANY_ID },
|
|
{ 0 },
|
|
};
|
|
|
|
static unsigned int features[] = {
|
|
VIRTIO_9P_MOUNT_TAG,
|
|
};
|
|
|
|
/* The standard "struct lguest_driver": */
|
|
static struct virtio_driver p9_virtio_drv = {
|
|
.feature_table = features,
|
|
.feature_table_size = ARRAY_SIZE(features),
|
|
.driver.name = KBUILD_MODNAME,
|
|
.driver.owner = THIS_MODULE,
|
|
.id_table = id_table,
|
|
.probe = p9_virtio_probe,
|
|
.remove = p9_virtio_remove,
|
|
};
|
|
|
|
static struct p9_trans_module p9_virtio_trans = {
|
|
.name = "virtio",
|
|
.create = p9_virtio_create,
|
|
.close = p9_virtio_close,
|
|
.request = p9_virtio_request,
|
|
.zc_request = p9_virtio_zc_request,
|
|
.cancel = p9_virtio_cancel,
|
|
.cancelled = p9_virtio_cancelled,
|
|
/*
|
|
* We leave one entry for input and one entry for response
|
|
* headers. We also skip one more entry to accommodate, address
|
|
* that are not at page boundary, that can result in an extra
|
|
* page in zero copy.
|
|
*/
|
|
.maxsize = PAGE_SIZE * (VIRTQUEUE_NUM - 3),
|
|
.def = 1,
|
|
.owner = THIS_MODULE,
|
|
};
|
|
|
|
/* The standard init function */
|
|
static int __init p9_virtio_init(void)
|
|
{
|
|
int rc;
|
|
|
|
INIT_LIST_HEAD(&virtio_chan_list);
|
|
|
|
v9fs_register_trans(&p9_virtio_trans);
|
|
rc = register_virtio_driver(&p9_virtio_drv);
|
|
if (rc)
|
|
v9fs_unregister_trans(&p9_virtio_trans);
|
|
|
|
return rc;
|
|
}
|
|
|
|
static void __exit p9_virtio_cleanup(void)
|
|
{
|
|
unregister_virtio_driver(&p9_virtio_drv);
|
|
v9fs_unregister_trans(&p9_virtio_trans);
|
|
}
|
|
|
|
module_init(p9_virtio_init);
|
|
module_exit(p9_virtio_cleanup);
|
|
MODULE_ALIAS_9P("virtio");
|
|
|
|
MODULE_DEVICE_TABLE(virtio, id_table);
|
|
MODULE_AUTHOR("Eric Van Hensbergen <ericvh@gmail.com>");
|
|
MODULE_DESCRIPTION("Virtio 9p Transport");
|
|
MODULE_LICENSE("GPL");
|