Some RDMA work and some good bugfixes, and two new features that could
benefit from user testing: Anna Schumacker contributed a simple NFSv4.2 COPY implementation. COPY is already supported on the client side, so a call to copy_file_range() on a recent client should now result in a server-side copy that doesn't require all the data to make a round trip to the client and back. Jeff Layton implemented callbacks to notify clients when contended locks become available, which should reduce latency on workloads with contended locks. -----BEGIN PGP SIGNATURE----- iQIcBAABAgAGBQJX/mcsAAoJECebzXlCjuG+MU0P/3SzTLGYXU5yOTAorx255/uf fUVKQQhTzzaA2xj3gWWWztYx3y0ZJUVgwU56a+Ap5Z8/goqDQ78H+ePEc+MG7BT/ /UXS/bITvt0MP/dvPrDzhSltvqx/wpelLPBo29hGLlAQ2dsnD4Y75IbOOQccWqcC iD2v6x7lnpWZ7j9Zhwzg/JNQHwISIb7tiLoYBjfcdNDEMU76KIyhxD0Cx9MSeBzH 9Rq/oEdwGDFS5WqVfNe2jxbngoauq1IupziQ2eQGv2D/POyXCx8fphoYjDz1XaW8 PxaJtJtM2owPGG+z2CxklJqNaS1Z4F+oppjg+nf4i/ibxmIBaTy8NluASX3vMh69 CDO1+ly+TiF0l1VqMOQJWRnqn1qGk6fLpF6P1Ac62B0oWpeLGU7nmik7XN1ORgsi 8ksxRKNAWeprZo3wl5xNrADu/wlZ7XCJTc4QoHEgYT04aHF+j8EMCHv+mtZ8+Bwn WWiA8iItZOgXV4vitCRJlvsixjYvmF3djPIoI2Lt5KDWIg+eL89sKwzTALSfeC4m Vjb0svzPX1MmZCNP1rCStFbl3gZYXZyqPk+uA6M7H8mjAjVeKxRPowWpMBgvYZHr FjCPb878bAuqCeBVbIyOLLcKWBLTw8PsUWZAor3gNg454JGkMjLUyJ/S22Cz5Nbo HdjoiTJtbPrHnCwTMXwa =nozl -----END PGP SIGNATURE----- Merge tag 'nfsd-4.9' of git://linux-nfs.org/~bfields/linux Pull nfsd updates from Bruce Fields: "Some RDMA work and some good bugfixes, and two new features that could benefit from user testing: - Anna Schumacker contributed a simple NFSv4.2 COPY implementation. COPY is already supported on the client side, so a call to copy_file_range() on a recent client should now result in a server-side copy that doesn't require all the data to make a round trip to the client and back. - Jeff Layton implemented callbacks to notify clients when contended locks become available, which should reduce latency on workloads with contended locks" * tag 'nfsd-4.9' of git://linux-nfs.org/~bfields/linux: NFSD: Implement the COPY call nfsd: handle EUCLEAN nfsd: only WARN once on unmapped errors exportfs: be careful to only return expected errors. nfsd4: setclientid_confirm with unmatched verifier should fail nfsd: randomize SETCLIENTID reply to help distinguish servers nfsd: set the MAY_NOTIFY_LOCK flag in OPEN replies nfs: add a new NFS4_OPEN_RESULT_MAY_NOTIFY_LOCK constant nfsd: add a LRU list for blocked locks nfsd: have nfsd4_lock use blocking locks for v4.1+ locks nfsd: plumb in a CB_NOTIFY_LOCK operation NFSD: fix corruption in notifier registration svcrdma: support Remote Invalidation svcrdma: Server-side support for rpcrdma_connect_private rpcrdma: RDMA/CM private message data structure svcrdma: Skip put_page() when send_reply() fails svcrdma: Tail iovec leaves an orphaned DMA mapping nfsd: fix dprintk in nfsd4_encode_getdeviceinfo nfsd: eliminate cb_minorversion field nfsd: don't set a FL_LAYOUT lease for flexfiles layouts
This commit is contained in:
commit
2778556474
@ -428,10 +428,10 @@ struct dentry *exportfs_decode_fh(struct vfsmount *mnt, struct fid *fid,
|
||||
if (!nop || !nop->fh_to_dentry)
|
||||
return ERR_PTR(-ESTALE);
|
||||
result = nop->fh_to_dentry(mnt->mnt_sb, fid, fh_len, fileid_type);
|
||||
if (!result)
|
||||
result = ERR_PTR(-ESTALE);
|
||||
if (IS_ERR(result))
|
||||
return result;
|
||||
if (PTR_ERR(result) == -ENOMEM)
|
||||
return ERR_CAST(result);
|
||||
if (IS_ERR_OR_NULL(result))
|
||||
return ERR_PTR(-ESTALE);
|
||||
|
||||
if (d_is_dir(result)) {
|
||||
/*
|
||||
@ -541,6 +541,8 @@ struct dentry *exportfs_decode_fh(struct vfsmount *mnt, struct fid *fid,
|
||||
|
||||
err_result:
|
||||
dput(result);
|
||||
if (err != -ENOMEM)
|
||||
err = -ESTALE;
|
||||
return ERR_PTR(err);
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(exportfs_decode_fh);
|
||||
|
@ -126,6 +126,7 @@ nfsd4_ff_proc_getdeviceinfo(struct super_block *sb, struct svc_rqst *rqstp,
|
||||
const struct nfsd4_layout_ops ff_layout_ops = {
|
||||
.notify_types =
|
||||
NOTIFY_DEVICEID4_DELETE | NOTIFY_DEVICEID4_CHANGE,
|
||||
.disable_recalls = true,
|
||||
.proc_getdeviceinfo = nfsd4_ff_proc_getdeviceinfo,
|
||||
.encode_getdeviceinfo = nfsd4_ff_encode_getdeviceinfo,
|
||||
.proc_layoutget = nfsd4_ff_proc_layoutget,
|
||||
|
@ -84,6 +84,7 @@ struct nfsd_net {
|
||||
struct list_head client_lru;
|
||||
struct list_head close_lru;
|
||||
struct list_head del_recall_lru;
|
||||
struct list_head blocked_locks_lru;
|
||||
|
||||
struct delayed_work laundromat_work;
|
||||
|
||||
|
@ -448,7 +448,7 @@ static int decode_cb_sequence4res(struct xdr_stream *xdr,
|
||||
{
|
||||
int status;
|
||||
|
||||
if (cb->cb_minorversion == 0)
|
||||
if (cb->cb_clp->cl_minorversion == 0)
|
||||
return 0;
|
||||
|
||||
status = decode_cb_op_status(xdr, OP_CB_SEQUENCE, &cb->cb_seq_status);
|
||||
@ -485,7 +485,7 @@ static void nfs4_xdr_enc_cb_recall(struct rpc_rqst *req, struct xdr_stream *xdr,
|
||||
const struct nfs4_delegation *dp = cb_to_delegation(cb);
|
||||
struct nfs4_cb_compound_hdr hdr = {
|
||||
.ident = cb->cb_clp->cl_cb_ident,
|
||||
.minorversion = cb->cb_minorversion,
|
||||
.minorversion = cb->cb_clp->cl_minorversion,
|
||||
};
|
||||
|
||||
encode_cb_compound4args(xdr, &hdr);
|
||||
@ -594,7 +594,7 @@ static void nfs4_xdr_enc_cb_layout(struct rpc_rqst *req,
|
||||
container_of(cb, struct nfs4_layout_stateid, ls_recall);
|
||||
struct nfs4_cb_compound_hdr hdr = {
|
||||
.ident = 0,
|
||||
.minorversion = cb->cb_minorversion,
|
||||
.minorversion = cb->cb_clp->cl_minorversion,
|
||||
};
|
||||
|
||||
encode_cb_compound4args(xdr, &hdr);
|
||||
@ -623,6 +623,62 @@ static int nfs4_xdr_dec_cb_layout(struct rpc_rqst *rqstp,
|
||||
}
|
||||
#endif /* CONFIG_NFSD_PNFS */
|
||||
|
||||
static void encode_stateowner(struct xdr_stream *xdr, struct nfs4_stateowner *so)
|
||||
{
|
||||
__be32 *p;
|
||||
|
||||
p = xdr_reserve_space(xdr, 8 + 4 + so->so_owner.len);
|
||||
p = xdr_encode_opaque_fixed(p, &so->so_client->cl_clientid, 8);
|
||||
xdr_encode_opaque(p, so->so_owner.data, so->so_owner.len);
|
||||
}
|
||||
|
||||
static void nfs4_xdr_enc_cb_notify_lock(struct rpc_rqst *req,
|
||||
struct xdr_stream *xdr,
|
||||
const struct nfsd4_callback *cb)
|
||||
{
|
||||
const struct nfsd4_blocked_lock *nbl =
|
||||
container_of(cb, struct nfsd4_blocked_lock, nbl_cb);
|
||||
struct nfs4_lockowner *lo = (struct nfs4_lockowner *)nbl->nbl_lock.fl_owner;
|
||||
struct nfs4_cb_compound_hdr hdr = {
|
||||
.ident = 0,
|
||||
.minorversion = cb->cb_clp->cl_minorversion,
|
||||
};
|
||||
|
||||
__be32 *p;
|
||||
|
||||
BUG_ON(hdr.minorversion == 0);
|
||||
|
||||
encode_cb_compound4args(xdr, &hdr);
|
||||
encode_cb_sequence4args(xdr, cb, &hdr);
|
||||
|
||||
p = xdr_reserve_space(xdr, 4);
|
||||
*p = cpu_to_be32(OP_CB_NOTIFY_LOCK);
|
||||
encode_nfs_fh4(xdr, &nbl->nbl_fh);
|
||||
encode_stateowner(xdr, &lo->lo_owner);
|
||||
hdr.nops++;
|
||||
|
||||
encode_cb_nops(&hdr);
|
||||
}
|
||||
|
||||
static int nfs4_xdr_dec_cb_notify_lock(struct rpc_rqst *rqstp,
|
||||
struct xdr_stream *xdr,
|
||||
struct nfsd4_callback *cb)
|
||||
{
|
||||
struct nfs4_cb_compound_hdr hdr;
|
||||
int status;
|
||||
|
||||
status = decode_cb_compound4res(xdr, &hdr);
|
||||
if (unlikely(status))
|
||||
return status;
|
||||
|
||||
if (cb) {
|
||||
status = decode_cb_sequence4res(xdr, cb);
|
||||
if (unlikely(status || cb->cb_seq_status))
|
||||
return status;
|
||||
}
|
||||
return decode_cb_op_status(xdr, OP_CB_NOTIFY_LOCK, &cb->cb_status);
|
||||
}
|
||||
|
||||
/*
|
||||
* RPC procedure tables
|
||||
*/
|
||||
@ -643,6 +699,7 @@ static struct rpc_procinfo nfs4_cb_procedures[] = {
|
||||
#ifdef CONFIG_NFSD_PNFS
|
||||
PROC(CB_LAYOUT, COMPOUND, cb_layout, cb_layout),
|
||||
#endif
|
||||
PROC(CB_NOTIFY_LOCK, COMPOUND, cb_notify_lock, cb_notify_lock),
|
||||
};
|
||||
|
||||
static struct rpc_version nfs_cb_version4 = {
|
||||
@ -862,7 +919,6 @@ static void nfsd4_cb_prepare(struct rpc_task *task, void *calldata)
|
||||
struct nfs4_client *clp = cb->cb_clp;
|
||||
u32 minorversion = clp->cl_minorversion;
|
||||
|
||||
cb->cb_minorversion = minorversion;
|
||||
/*
|
||||
* cb_seq_status is only set in decode_cb_sequence4res,
|
||||
* and so will remain 1 if an rpc level failure occurs.
|
||||
|
@ -174,7 +174,8 @@ nfsd4_free_layout_stateid(struct nfs4_stid *stid)
|
||||
list_del_init(&ls->ls_perfile);
|
||||
spin_unlock(&fp->fi_lock);
|
||||
|
||||
vfs_setlease(ls->ls_file, F_UNLCK, NULL, (void **)&ls);
|
||||
if (!nfsd4_layout_ops[ls->ls_layout_type]->disable_recalls)
|
||||
vfs_setlease(ls->ls_file, F_UNLCK, NULL, (void **)&ls);
|
||||
fput(ls->ls_file);
|
||||
|
||||
if (ls->ls_recalled)
|
||||
@ -189,6 +190,9 @@ nfsd4_layout_setlease(struct nfs4_layout_stateid *ls)
|
||||
struct file_lock *fl;
|
||||
int status;
|
||||
|
||||
if (nfsd4_layout_ops[ls->ls_layout_type]->disable_recalls)
|
||||
return 0;
|
||||
|
||||
fl = locks_alloc_lock();
|
||||
if (!fl)
|
||||
return -ENOMEM;
|
||||
|
@ -1010,46 +1010,96 @@ nfsd4_write(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
|
||||
}
|
||||
|
||||
static __be32
|
||||
nfsd4_clone(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
|
||||
struct nfsd4_clone *clone)
|
||||
nfsd4_verify_copy(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
|
||||
stateid_t *src_stateid, struct file **src,
|
||||
stateid_t *dst_stateid, struct file **dst)
|
||||
{
|
||||
struct file *src, *dst;
|
||||
__be32 status;
|
||||
|
||||
status = nfs4_preprocess_stateid_op(rqstp, cstate, &cstate->save_fh,
|
||||
&clone->cl_src_stateid, RD_STATE,
|
||||
&src, NULL);
|
||||
src_stateid, RD_STATE, src, NULL);
|
||||
if (status) {
|
||||
dprintk("NFSD: %s: couldn't process src stateid!\n", __func__);
|
||||
goto out;
|
||||
}
|
||||
|
||||
status = nfs4_preprocess_stateid_op(rqstp, cstate, &cstate->current_fh,
|
||||
&clone->cl_dst_stateid, WR_STATE,
|
||||
&dst, NULL);
|
||||
dst_stateid, WR_STATE, dst, NULL);
|
||||
if (status) {
|
||||
dprintk("NFSD: %s: couldn't process dst stateid!\n", __func__);
|
||||
goto out_put_src;
|
||||
}
|
||||
|
||||
/* fix up for NFS-specific error code */
|
||||
if (!S_ISREG(file_inode(src)->i_mode) ||
|
||||
!S_ISREG(file_inode(dst)->i_mode)) {
|
||||
if (!S_ISREG(file_inode(*src)->i_mode) ||
|
||||
!S_ISREG(file_inode(*dst)->i_mode)) {
|
||||
status = nfserr_wrong_type;
|
||||
goto out_put_dst;
|
||||
}
|
||||
|
||||
out:
|
||||
return status;
|
||||
out_put_dst:
|
||||
fput(*dst);
|
||||
out_put_src:
|
||||
fput(*src);
|
||||
goto out;
|
||||
}
|
||||
|
||||
static __be32
|
||||
nfsd4_clone(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
|
||||
struct nfsd4_clone *clone)
|
||||
{
|
||||
struct file *src, *dst;
|
||||
__be32 status;
|
||||
|
||||
status = nfsd4_verify_copy(rqstp, cstate, &clone->cl_src_stateid, &src,
|
||||
&clone->cl_dst_stateid, &dst);
|
||||
if (status)
|
||||
goto out;
|
||||
|
||||
status = nfsd4_clone_file_range(src, clone->cl_src_pos,
|
||||
dst, clone->cl_dst_pos, clone->cl_count);
|
||||
|
||||
out_put_dst:
|
||||
fput(dst);
|
||||
out_put_src:
|
||||
fput(src);
|
||||
out:
|
||||
return status;
|
||||
}
|
||||
|
||||
static __be32
|
||||
nfsd4_copy(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
|
||||
struct nfsd4_copy *copy)
|
||||
{
|
||||
struct file *src, *dst;
|
||||
__be32 status;
|
||||
ssize_t bytes;
|
||||
|
||||
status = nfsd4_verify_copy(rqstp, cstate, ©->cp_src_stateid, &src,
|
||||
©->cp_dst_stateid, &dst);
|
||||
if (status)
|
||||
goto out;
|
||||
|
||||
bytes = nfsd_copy_file_range(src, copy->cp_src_pos,
|
||||
dst, copy->cp_dst_pos, copy->cp_count);
|
||||
|
||||
if (bytes < 0)
|
||||
status = nfserrno(bytes);
|
||||
else {
|
||||
copy->cp_res.wr_bytes_written = bytes;
|
||||
copy->cp_res.wr_stable_how = NFS_UNSTABLE;
|
||||
copy->cp_consecutive = 1;
|
||||
copy->cp_synchronous = 1;
|
||||
gen_boot_verifier(©->cp_res.wr_verifier, SVC_NET(rqstp));
|
||||
status = nfs_ok;
|
||||
}
|
||||
|
||||
fput(src);
|
||||
fput(dst);
|
||||
out:
|
||||
return status;
|
||||
}
|
||||
|
||||
static __be32
|
||||
nfsd4_fallocate(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
|
||||
struct nfsd4_fallocate *fallocate, int flags)
|
||||
@ -1966,6 +2016,18 @@ static inline u32 nfsd4_create_session_rsize(struct svc_rqst *rqstp, struct nfsd
|
||||
op_encode_channel_attrs_maxsz) * sizeof(__be32);
|
||||
}
|
||||
|
||||
static inline u32 nfsd4_copy_rsize(struct svc_rqst *rqstp, struct nfsd4_op *op)
|
||||
{
|
||||
return (op_encode_hdr_size +
|
||||
1 /* wr_callback */ +
|
||||
op_encode_stateid_maxsz /* wr_callback */ +
|
||||
2 /* wr_count */ +
|
||||
1 /* wr_committed */ +
|
||||
op_encode_verifier_maxsz +
|
||||
1 /* cr_consecutive */ +
|
||||
1 /* cr_synchronous */) * sizeof(__be32);
|
||||
}
|
||||
|
||||
#ifdef CONFIG_NFSD_PNFS
|
||||
/*
|
||||
* At this stage we don't really know what layout driver will handle the request,
|
||||
@ -2328,6 +2390,12 @@ static struct nfsd4_operation nfsd4_ops[] = {
|
||||
.op_name = "OP_CLONE",
|
||||
.op_rsize_bop = (nfsd4op_rsize)nfsd4_only_status_rsize,
|
||||
},
|
||||
[OP_COPY] = {
|
||||
.op_func = (nfsd4op_func)nfsd4_copy,
|
||||
.op_flags = OP_MODIFIES_SOMETHING | OP_CACHEME,
|
||||
.op_name = "OP_COPY",
|
||||
.op_rsize_bop = (nfsd4op_rsize)nfsd4_copy_rsize,
|
||||
},
|
||||
[OP_SEEK] = {
|
||||
.op_func = (nfsd4op_func)nfsd4_seek,
|
||||
.op_name = "OP_SEEK",
|
||||
|
@ -99,6 +99,7 @@ static struct kmem_cache *odstate_slab;
|
||||
static void free_session(struct nfsd4_session *);
|
||||
|
||||
static const struct nfsd4_callback_ops nfsd4_cb_recall_ops;
|
||||
static const struct nfsd4_callback_ops nfsd4_cb_notify_lock_ops;
|
||||
|
||||
static bool is_session_dead(struct nfsd4_session *ses)
|
||||
{
|
||||
@ -210,6 +211,85 @@ static void nfsd4_put_session(struct nfsd4_session *ses)
|
||||
spin_unlock(&nn->client_lock);
|
||||
}
|
||||
|
||||
static struct nfsd4_blocked_lock *
|
||||
find_blocked_lock(struct nfs4_lockowner *lo, struct knfsd_fh *fh,
|
||||
struct nfsd_net *nn)
|
||||
{
|
||||
struct nfsd4_blocked_lock *cur, *found = NULL;
|
||||
|
||||
spin_lock(&nn->client_lock);
|
||||
list_for_each_entry(cur, &lo->lo_blocked, nbl_list) {
|
||||
if (fh_match(fh, &cur->nbl_fh)) {
|
||||
list_del_init(&cur->nbl_list);
|
||||
list_del_init(&cur->nbl_lru);
|
||||
found = cur;
|
||||
break;
|
||||
}
|
||||
}
|
||||
spin_unlock(&nn->client_lock);
|
||||
if (found)
|
||||
posix_unblock_lock(&found->nbl_lock);
|
||||
return found;
|
||||
}
|
||||
|
||||
static struct nfsd4_blocked_lock *
|
||||
find_or_allocate_block(struct nfs4_lockowner *lo, struct knfsd_fh *fh,
|
||||
struct nfsd_net *nn)
|
||||
{
|
||||
struct nfsd4_blocked_lock *nbl;
|
||||
|
||||
nbl = find_blocked_lock(lo, fh, nn);
|
||||
if (!nbl) {
|
||||
nbl= kmalloc(sizeof(*nbl), GFP_KERNEL);
|
||||
if (nbl) {
|
||||
fh_copy_shallow(&nbl->nbl_fh, fh);
|
||||
locks_init_lock(&nbl->nbl_lock);
|
||||
nfsd4_init_cb(&nbl->nbl_cb, lo->lo_owner.so_client,
|
||||
&nfsd4_cb_notify_lock_ops,
|
||||
NFSPROC4_CLNT_CB_NOTIFY_LOCK);
|
||||
}
|
||||
}
|
||||
return nbl;
|
||||
}
|
||||
|
||||
static void
|
||||
free_blocked_lock(struct nfsd4_blocked_lock *nbl)
|
||||
{
|
||||
locks_release_private(&nbl->nbl_lock);
|
||||
kfree(nbl);
|
||||
}
|
||||
|
||||
static int
|
||||
nfsd4_cb_notify_lock_done(struct nfsd4_callback *cb, struct rpc_task *task)
|
||||
{
|
||||
/*
|
||||
* Since this is just an optimization, we don't try very hard if it
|
||||
* turns out not to succeed. We'll requeue it on NFS4ERR_DELAY, and
|
||||
* just quit trying on anything else.
|
||||
*/
|
||||
switch (task->tk_status) {
|
||||
case -NFS4ERR_DELAY:
|
||||
rpc_delay(task, 1 * HZ);
|
||||
return 0;
|
||||
default:
|
||||
return 1;
|
||||
}
|
||||
}
|
||||
|
||||
static void
|
||||
nfsd4_cb_notify_lock_release(struct nfsd4_callback *cb)
|
||||
{
|
||||
struct nfsd4_blocked_lock *nbl = container_of(cb,
|
||||
struct nfsd4_blocked_lock, nbl_cb);
|
||||
|
||||
free_blocked_lock(nbl);
|
||||
}
|
||||
|
||||
static const struct nfsd4_callback_ops nfsd4_cb_notify_lock_ops = {
|
||||
.done = nfsd4_cb_notify_lock_done,
|
||||
.release = nfsd4_cb_notify_lock_release,
|
||||
};
|
||||
|
||||
static inline struct nfs4_stateowner *
|
||||
nfs4_get_stateowner(struct nfs4_stateowner *sop)
|
||||
{
|
||||
@ -3224,9 +3304,10 @@ nfsd4_setclientid_confirm(struct svc_rqst *rqstp,
|
||||
goto out;
|
||||
/* cases below refer to rfc 3530 section 14.2.34: */
|
||||
if (!unconf || !same_verf(&confirm, &unconf->cl_confirm)) {
|
||||
if (conf && !unconf) /* case 2: probable retransmit */
|
||||
if (conf && same_verf(&confirm, &conf->cl_confirm)) {
|
||||
/* case 2: probable retransmit */
|
||||
status = nfs_ok;
|
||||
else /* case 4: client hasn't noticed we rebooted yet? */
|
||||
} else /* case 4: client hasn't noticed we rebooted yet? */
|
||||
status = nfserr_stale_clientid;
|
||||
goto out;
|
||||
}
|
||||
@ -4410,9 +4491,11 @@ out:
|
||||
* To finish the open response, we just need to set the rflags.
|
||||
*/
|
||||
open->op_rflags = NFS4_OPEN_RESULT_LOCKTYPE_POSIX;
|
||||
if (!(open->op_openowner->oo_flags & NFS4_OO_CONFIRMED) &&
|
||||
!nfsd4_has_session(&resp->cstate))
|
||||
if (nfsd4_has_session(&resp->cstate))
|
||||
open->op_rflags |= NFS4_OPEN_RESULT_MAY_NOTIFY_LOCK;
|
||||
else if (!(open->op_openowner->oo_flags & NFS4_OO_CONFIRMED))
|
||||
open->op_rflags |= NFS4_OPEN_RESULT_CONFIRM;
|
||||
|
||||
if (dp)
|
||||
nfs4_put_stid(&dp->dl_stid);
|
||||
if (stp)
|
||||
@ -4501,6 +4584,7 @@ nfs4_laundromat(struct nfsd_net *nn)
|
||||
struct nfs4_openowner *oo;
|
||||
struct nfs4_delegation *dp;
|
||||
struct nfs4_ol_stateid *stp;
|
||||
struct nfsd4_blocked_lock *nbl;
|
||||
struct list_head *pos, *next, reaplist;
|
||||
time_t cutoff = get_seconds() - nn->nfsd4_lease;
|
||||
time_t t, new_timeo = nn->nfsd4_lease;
|
||||
@ -4569,6 +4653,41 @@ nfs4_laundromat(struct nfsd_net *nn)
|
||||
}
|
||||
spin_unlock(&nn->client_lock);
|
||||
|
||||
/*
|
||||
* It's possible for a client to try and acquire an already held lock
|
||||
* that is being held for a long time, and then lose interest in it.
|
||||
* So, we clean out any un-revisited request after a lease period
|
||||
* under the assumption that the client is no longer interested.
|
||||
*
|
||||
* RFC5661, sec. 9.6 states that the client must not rely on getting
|
||||
* notifications and must continue to poll for locks, even when the
|
||||
* server supports them. Thus this shouldn't lead to clients blocking
|
||||
* indefinitely once the lock does become free.
|
||||
*/
|
||||
BUG_ON(!list_empty(&reaplist));
|
||||
spin_lock(&nn->client_lock);
|
||||
while (!list_empty(&nn->blocked_locks_lru)) {
|
||||
nbl = list_first_entry(&nn->blocked_locks_lru,
|
||||
struct nfsd4_blocked_lock, nbl_lru);
|
||||
if (time_after((unsigned long)nbl->nbl_time,
|
||||
(unsigned long)cutoff)) {
|
||||
t = nbl->nbl_time - cutoff;
|
||||
new_timeo = min(new_timeo, t);
|
||||
break;
|
||||
}
|
||||
list_move(&nbl->nbl_lru, &reaplist);
|
||||
list_del_init(&nbl->nbl_list);
|
||||
}
|
||||
spin_unlock(&nn->client_lock);
|
||||
|
||||
while (!list_empty(&reaplist)) {
|
||||
nbl = list_first_entry(&nn->blocked_locks_lru,
|
||||
struct nfsd4_blocked_lock, nbl_lru);
|
||||
list_del_init(&nbl->nbl_lru);
|
||||
posix_unblock_lock(&nbl->nbl_lock);
|
||||
free_blocked_lock(nbl);
|
||||
}
|
||||
|
||||
new_timeo = max_t(time_t, new_timeo, NFSD_LAUNDROMAT_MINTIMEOUT);
|
||||
return new_timeo;
|
||||
}
|
||||
@ -5309,7 +5428,31 @@ nfsd4_fl_put_owner(fl_owner_t owner)
|
||||
nfs4_put_stateowner(&lo->lo_owner);
|
||||
}
|
||||
|
||||
static void
|
||||
nfsd4_lm_notify(struct file_lock *fl)
|
||||
{
|
||||
struct nfs4_lockowner *lo = (struct nfs4_lockowner *)fl->fl_owner;
|
||||
struct net *net = lo->lo_owner.so_client->net;
|
||||
struct nfsd_net *nn = net_generic(net, nfsd_net_id);
|
||||
struct nfsd4_blocked_lock *nbl = container_of(fl,
|
||||
struct nfsd4_blocked_lock, nbl_lock);
|
||||
bool queue = false;
|
||||
|
||||
/* An empty list means that something else is going to be using it */
|
||||
spin_lock(&nn->client_lock);
|
||||
if (!list_empty(&nbl->nbl_list)) {
|
||||
list_del_init(&nbl->nbl_list);
|
||||
list_del_init(&nbl->nbl_lru);
|
||||
queue = true;
|
||||
}
|
||||
spin_unlock(&nn->client_lock);
|
||||
|
||||
if (queue)
|
||||
nfsd4_run_cb(&nbl->nbl_cb);
|
||||
}
|
||||
|
||||
static const struct lock_manager_operations nfsd_posix_mng_ops = {
|
||||
.lm_notify = nfsd4_lm_notify,
|
||||
.lm_get_owner = nfsd4_fl_get_owner,
|
||||
.lm_put_owner = nfsd4_fl_put_owner,
|
||||
};
|
||||
@ -5407,6 +5550,7 @@ alloc_init_lock_stateowner(unsigned int strhashval, struct nfs4_client *clp,
|
||||
lo = alloc_stateowner(lockowner_slab, &lock->lk_new_owner, clp);
|
||||
if (!lo)
|
||||
return NULL;
|
||||
INIT_LIST_HEAD(&lo->lo_blocked);
|
||||
INIT_LIST_HEAD(&lo->lo_owner.so_stateids);
|
||||
lo->lo_owner.so_is_open_owner = 0;
|
||||
lo->lo_owner.so_seqid = lock->lk_new_lock_seqid;
|
||||
@ -5588,12 +5732,15 @@ nfsd4_lock(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
|
||||
struct nfs4_ol_stateid *open_stp = NULL;
|
||||
struct nfs4_file *fp;
|
||||
struct file *filp = NULL;
|
||||
struct nfsd4_blocked_lock *nbl = NULL;
|
||||
struct file_lock *file_lock = NULL;
|
||||
struct file_lock *conflock = NULL;
|
||||
__be32 status = 0;
|
||||
int lkflg;
|
||||
int err;
|
||||
bool new = false;
|
||||
unsigned char fl_type;
|
||||
unsigned int fl_flags = FL_POSIX;
|
||||
struct net *net = SVC_NET(rqstp);
|
||||
struct nfsd_net *nn = net_generic(net, nfsd_net_id);
|
||||
|
||||
@ -5658,46 +5805,55 @@ nfsd4_lock(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
|
||||
if (!locks_in_grace(net) && lock->lk_reclaim)
|
||||
goto out;
|
||||
|
||||
file_lock = locks_alloc_lock();
|
||||
if (!file_lock) {
|
||||
dprintk("NFSD: %s: unable to allocate lock!\n", __func__);
|
||||
status = nfserr_jukebox;
|
||||
goto out;
|
||||
}
|
||||
|
||||
fp = lock_stp->st_stid.sc_file;
|
||||
switch (lock->lk_type) {
|
||||
case NFS4_READ_LT:
|
||||
case NFS4_READW_LT:
|
||||
if (nfsd4_has_session(cstate))
|
||||
fl_flags |= FL_SLEEP;
|
||||
/* Fallthrough */
|
||||
case NFS4_READ_LT:
|
||||
spin_lock(&fp->fi_lock);
|
||||
filp = find_readable_file_locked(fp);
|
||||
if (filp)
|
||||
get_lock_access(lock_stp, NFS4_SHARE_ACCESS_READ);
|
||||
spin_unlock(&fp->fi_lock);
|
||||
file_lock->fl_type = F_RDLCK;
|
||||
fl_type = F_RDLCK;
|
||||
break;
|
||||
case NFS4_WRITE_LT:
|
||||
case NFS4_WRITEW_LT:
|
||||
if (nfsd4_has_session(cstate))
|
||||
fl_flags |= FL_SLEEP;
|
||||
/* Fallthrough */
|
||||
case NFS4_WRITE_LT:
|
||||
spin_lock(&fp->fi_lock);
|
||||
filp = find_writeable_file_locked(fp);
|
||||
if (filp)
|
||||
get_lock_access(lock_stp, NFS4_SHARE_ACCESS_WRITE);
|
||||
spin_unlock(&fp->fi_lock);
|
||||
file_lock->fl_type = F_WRLCK;
|
||||
fl_type = F_WRLCK;
|
||||
break;
|
||||
default:
|
||||
status = nfserr_inval;
|
||||
goto out;
|
||||
}
|
||||
|
||||
if (!filp) {
|
||||
status = nfserr_openmode;
|
||||
goto out;
|
||||
}
|
||||
|
||||
nbl = find_or_allocate_block(lock_sop, &fp->fi_fhandle, nn);
|
||||
if (!nbl) {
|
||||
dprintk("NFSD: %s: unable to allocate block!\n", __func__);
|
||||
status = nfserr_jukebox;
|
||||
goto out;
|
||||
}
|
||||
|
||||
file_lock = &nbl->nbl_lock;
|
||||
file_lock->fl_type = fl_type;
|
||||
file_lock->fl_owner = (fl_owner_t)lockowner(nfs4_get_stateowner(&lock_sop->lo_owner));
|
||||
file_lock->fl_pid = current->tgid;
|
||||
file_lock->fl_file = filp;
|
||||
file_lock->fl_flags = FL_POSIX;
|
||||
file_lock->fl_flags = fl_flags;
|
||||
file_lock->fl_lmops = &nfsd_posix_mng_ops;
|
||||
file_lock->fl_start = lock->lk_offset;
|
||||
file_lock->fl_end = last_byte_offset(lock->lk_offset, lock->lk_length);
|
||||
@ -5710,18 +5866,29 @@ nfsd4_lock(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
|
||||
goto out;
|
||||
}
|
||||
|
||||
if (fl_flags & FL_SLEEP) {
|
||||
nbl->nbl_time = jiffies;
|
||||
spin_lock(&nn->client_lock);
|
||||
list_add_tail(&nbl->nbl_list, &lock_sop->lo_blocked);
|
||||
list_add_tail(&nbl->nbl_lru, &nn->blocked_locks_lru);
|
||||
spin_unlock(&nn->client_lock);
|
||||
}
|
||||
|
||||
err = vfs_lock_file(filp, F_SETLK, file_lock, conflock);
|
||||
switch (-err) {
|
||||
switch (err) {
|
||||
case 0: /* success! */
|
||||
nfs4_inc_and_copy_stateid(&lock->lk_resp_stateid, &lock_stp->st_stid);
|
||||
status = 0;
|
||||
break;
|
||||
case (EAGAIN): /* conflock holds conflicting lock */
|
||||
case FILE_LOCK_DEFERRED:
|
||||
nbl = NULL;
|
||||
/* Fallthrough */
|
||||
case -EAGAIN: /* conflock holds conflicting lock */
|
||||
status = nfserr_denied;
|
||||
dprintk("NFSD: nfsd4_lock: conflicting lock found!\n");
|
||||
nfs4_set_lock_denied(conflock, &lock->lk_denied);
|
||||
break;
|
||||
case (EDEADLK):
|
||||
case -EDEADLK:
|
||||
status = nfserr_deadlock;
|
||||
break;
|
||||
default:
|
||||
@ -5730,6 +5897,16 @@ nfsd4_lock(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
|
||||
break;
|
||||
}
|
||||
out:
|
||||
if (nbl) {
|
||||
/* dequeue it if we queued it before */
|
||||
if (fl_flags & FL_SLEEP) {
|
||||
spin_lock(&nn->client_lock);
|
||||
list_del_init(&nbl->nbl_list);
|
||||
list_del_init(&nbl->nbl_lru);
|
||||
spin_unlock(&nn->client_lock);
|
||||
}
|
||||
free_blocked_lock(nbl);
|
||||
}
|
||||
if (filp)
|
||||
fput(filp);
|
||||
if (lock_stp) {
|
||||
@ -5753,8 +5930,6 @@ out:
|
||||
if (open_stp)
|
||||
nfs4_put_stid(&open_stp->st_stid);
|
||||
nfsd4_bump_seqid(cstate, status);
|
||||
if (file_lock)
|
||||
locks_free_lock(file_lock);
|
||||
if (conflock)
|
||||
locks_free_lock(conflock);
|
||||
return status;
|
||||
@ -6768,6 +6943,7 @@ static int nfs4_state_create_net(struct net *net)
|
||||
INIT_LIST_HEAD(&nn->client_lru);
|
||||
INIT_LIST_HEAD(&nn->close_lru);
|
||||
INIT_LIST_HEAD(&nn->del_recall_lru);
|
||||
INIT_LIST_HEAD(&nn->blocked_locks_lru);
|
||||
spin_lock_init(&nn->client_lock);
|
||||
|
||||
INIT_DELAYED_WORK(&nn->laundromat_work, laundromat_main);
|
||||
@ -6865,6 +7041,7 @@ nfs4_state_shutdown_net(struct net *net)
|
||||
struct nfs4_delegation *dp = NULL;
|
||||
struct list_head *pos, *next, reaplist;
|
||||
struct nfsd_net *nn = net_generic(net, nfsd_net_id);
|
||||
struct nfsd4_blocked_lock *nbl;
|
||||
|
||||
cancel_delayed_work_sync(&nn->laundromat_work);
|
||||
locks_end_grace(&nn->nfsd4_manager);
|
||||
@ -6885,6 +7062,24 @@ nfs4_state_shutdown_net(struct net *net)
|
||||
nfs4_put_stid(&dp->dl_stid);
|
||||
}
|
||||
|
||||
BUG_ON(!list_empty(&reaplist));
|
||||
spin_lock(&nn->client_lock);
|
||||
while (!list_empty(&nn->blocked_locks_lru)) {
|
||||
nbl = list_first_entry(&nn->blocked_locks_lru,
|
||||
struct nfsd4_blocked_lock, nbl_lru);
|
||||
list_move(&nbl->nbl_lru, &reaplist);
|
||||
list_del_init(&nbl->nbl_list);
|
||||
}
|
||||
spin_unlock(&nn->client_lock);
|
||||
|
||||
while (!list_empty(&reaplist)) {
|
||||
nbl = list_first_entry(&nn->blocked_locks_lru,
|
||||
struct nfsd4_blocked_lock, nbl_lru);
|
||||
list_del_init(&nbl->nbl_lru);
|
||||
posix_unblock_lock(&nbl->nbl_lock);
|
||||
free_blocked_lock(nbl);
|
||||
}
|
||||
|
||||
nfsd4_client_tracking_exit(net);
|
||||
nfs4_state_destroy_net(net);
|
||||
}
|
||||
|
@ -1693,6 +1693,30 @@ nfsd4_decode_clone(struct nfsd4_compoundargs *argp, struct nfsd4_clone *clone)
|
||||
DECODE_TAIL;
|
||||
}
|
||||
|
||||
static __be32
|
||||
nfsd4_decode_copy(struct nfsd4_compoundargs *argp, struct nfsd4_copy *copy)
|
||||
{
|
||||
DECODE_HEAD;
|
||||
unsigned int tmp;
|
||||
|
||||
status = nfsd4_decode_stateid(argp, ©->cp_src_stateid);
|
||||
if (status)
|
||||
return status;
|
||||
status = nfsd4_decode_stateid(argp, ©->cp_dst_stateid);
|
||||
if (status)
|
||||
return status;
|
||||
|
||||
READ_BUF(8 + 8 + 8 + 4 + 4 + 4);
|
||||
p = xdr_decode_hyper(p, ©->cp_src_pos);
|
||||
p = xdr_decode_hyper(p, ©->cp_dst_pos);
|
||||
p = xdr_decode_hyper(p, ©->cp_count);
|
||||
copy->cp_consecutive = be32_to_cpup(p++);
|
||||
copy->cp_synchronous = be32_to_cpup(p++);
|
||||
tmp = be32_to_cpup(p); /* Source server list not supported */
|
||||
|
||||
DECODE_TAIL;
|
||||
}
|
||||
|
||||
static __be32
|
||||
nfsd4_decode_seek(struct nfsd4_compoundargs *argp, struct nfsd4_seek *seek)
|
||||
{
|
||||
@ -1793,7 +1817,7 @@ static nfsd4_dec nfsd4_dec_ops[] = {
|
||||
|
||||
/* new operations for NFSv4.2 */
|
||||
[OP_ALLOCATE] = (nfsd4_dec)nfsd4_decode_fallocate,
|
||||
[OP_COPY] = (nfsd4_dec)nfsd4_decode_notsupp,
|
||||
[OP_COPY] = (nfsd4_dec)nfsd4_decode_copy,
|
||||
[OP_COPY_NOTIFY] = (nfsd4_dec)nfsd4_decode_notsupp,
|
||||
[OP_DEALLOCATE] = (nfsd4_dec)nfsd4_decode_fallocate,
|
||||
[OP_IO_ADVISE] = (nfsd4_dec)nfsd4_decode_notsupp,
|
||||
@ -4062,7 +4086,7 @@ nfsd4_encode_getdeviceinfo(struct nfsd4_compoundres *resp, __be32 nfserr,
|
||||
u32 starting_len = xdr->buf->len, needed_len;
|
||||
__be32 *p;
|
||||
|
||||
dprintk("%s: err %d\n", __func__, nfserr);
|
||||
dprintk("%s: err %d\n", __func__, be32_to_cpu(nfserr));
|
||||
if (nfserr)
|
||||
goto out;
|
||||
|
||||
@ -4201,6 +4225,41 @@ nfsd4_encode_layoutreturn(struct nfsd4_compoundres *resp, __be32 nfserr,
|
||||
}
|
||||
#endif /* CONFIG_NFSD_PNFS */
|
||||
|
||||
static __be32
|
||||
nfsd42_encode_write_res(struct nfsd4_compoundres *resp, struct nfsd42_write_res *write)
|
||||
{
|
||||
__be32 *p;
|
||||
|
||||
p = xdr_reserve_space(&resp->xdr, 4 + 8 + 4 + NFS4_VERIFIER_SIZE);
|
||||
if (!p)
|
||||
return nfserr_resource;
|
||||
|
||||
*p++ = cpu_to_be32(0);
|
||||
p = xdr_encode_hyper(p, write->wr_bytes_written);
|
||||
*p++ = cpu_to_be32(write->wr_stable_how);
|
||||
p = xdr_encode_opaque_fixed(p, write->wr_verifier.data,
|
||||
NFS4_VERIFIER_SIZE);
|
||||
return nfs_ok;
|
||||
}
|
||||
|
||||
static __be32
|
||||
nfsd4_encode_copy(struct nfsd4_compoundres *resp, __be32 nfserr,
|
||||
struct nfsd4_copy *copy)
|
||||
{
|
||||
__be32 *p;
|
||||
|
||||
if (!nfserr) {
|
||||
nfserr = nfsd42_encode_write_res(resp, ©->cp_res);
|
||||
if (nfserr)
|
||||
return nfserr;
|
||||
|
||||
p = xdr_reserve_space(&resp->xdr, 4 + 4);
|
||||
*p++ = cpu_to_be32(copy->cp_consecutive);
|
||||
*p++ = cpu_to_be32(copy->cp_synchronous);
|
||||
}
|
||||
return nfserr;
|
||||
}
|
||||
|
||||
static __be32
|
||||
nfsd4_encode_seek(struct nfsd4_compoundres *resp, __be32 nfserr,
|
||||
struct nfsd4_seek *seek)
|
||||
@ -4300,7 +4359,7 @@ static nfsd4_enc nfsd4_enc_ops[] = {
|
||||
|
||||
/* NFSv4.2 operations */
|
||||
[OP_ALLOCATE] = (nfsd4_enc)nfsd4_encode_noop,
|
||||
[OP_COPY] = (nfsd4_enc)nfsd4_encode_noop,
|
||||
[OP_COPY] = (nfsd4_enc)nfsd4_encode_copy,
|
||||
[OP_COPY_NOTIFY] = (nfsd4_enc)nfsd4_encode_noop,
|
||||
[OP_DEALLOCATE] = (nfsd4_enc)nfsd4_encode_noop,
|
||||
[OP_IO_ADVISE] = (nfsd4_enc)nfsd4_encode_noop,
|
||||
|
@ -1216,6 +1216,8 @@ static __net_init int nfsd_init_net(struct net *net)
|
||||
goto out_idmap_error;
|
||||
nn->nfsd4_lease = 90; /* default lease time */
|
||||
nn->nfsd4_grace = 90;
|
||||
nn->clverifier_counter = prandom_u32();
|
||||
nn->clientid_counter = prandom_u32();
|
||||
return 0;
|
||||
|
||||
out_idmap_error:
|
||||
|
@ -789,6 +789,7 @@ nfserrno (int errno)
|
||||
{ nfserr_toosmall, -ETOOSMALL },
|
||||
{ nfserr_serverfault, -ESERVERFAULT },
|
||||
{ nfserr_serverfault, -ENFILE },
|
||||
{ nfserr_io, -EUCLEAN },
|
||||
};
|
||||
int i;
|
||||
|
||||
@ -796,7 +797,7 @@ nfserrno (int errno)
|
||||
if (nfs_errtbl[i].syserr == errno)
|
||||
return nfs_errtbl[i].nfserr;
|
||||
}
|
||||
WARN(1, "nfsd: non-standard errno: %d\n", errno);
|
||||
WARN_ONCE(1, "nfsd: non-standard errno: %d\n", errno);
|
||||
return nfserr_io;
|
||||
}
|
||||
|
||||
|
@ -366,14 +366,21 @@ static struct notifier_block nfsd_inet6addr_notifier = {
|
||||
};
|
||||
#endif
|
||||
|
||||
/* Only used under nfsd_mutex, so this atomic may be overkill: */
|
||||
static atomic_t nfsd_notifier_refcount = ATOMIC_INIT(0);
|
||||
|
||||
static void nfsd_last_thread(struct svc_serv *serv, struct net *net)
|
||||
{
|
||||
struct nfsd_net *nn = net_generic(net, nfsd_net_id);
|
||||
|
||||
unregister_inetaddr_notifier(&nfsd_inetaddr_notifier);
|
||||
/* check if the notifier still has clients */
|
||||
if (atomic_dec_return(&nfsd_notifier_refcount) == 0) {
|
||||
unregister_inetaddr_notifier(&nfsd_inetaddr_notifier);
|
||||
#if IS_ENABLED(CONFIG_IPV6)
|
||||
unregister_inet6addr_notifier(&nfsd_inet6addr_notifier);
|
||||
unregister_inet6addr_notifier(&nfsd_inet6addr_notifier);
|
||||
#endif
|
||||
}
|
||||
|
||||
/*
|
||||
* write_ports can create the server without actually starting
|
||||
* any threads--if we get shut down before any threads are
|
||||
@ -488,10 +495,13 @@ int nfsd_create_serv(struct net *net)
|
||||
}
|
||||
|
||||
set_max_drc();
|
||||
register_inetaddr_notifier(&nfsd_inetaddr_notifier);
|
||||
/* check if the notifier is already set */
|
||||
if (atomic_inc_return(&nfsd_notifier_refcount) == 1) {
|
||||
register_inetaddr_notifier(&nfsd_inetaddr_notifier);
|
||||
#if IS_ENABLED(CONFIG_IPV6)
|
||||
register_inet6addr_notifier(&nfsd_inet6addr_notifier);
|
||||
register_inet6addr_notifier(&nfsd_inet6addr_notifier);
|
||||
#endif
|
||||
}
|
||||
do_gettimeofday(&nn->nfssvc_boot); /* record boot time */
|
||||
return 0;
|
||||
}
|
||||
|
@ -19,6 +19,7 @@ struct nfsd4_deviceid_map {
|
||||
|
||||
struct nfsd4_layout_ops {
|
||||
u32 notify_types;
|
||||
bool disable_recalls;
|
||||
|
||||
__be32 (*proc_getdeviceinfo)(struct super_block *sb,
|
||||
struct svc_rqst *rqstp,
|
||||
|
@ -63,7 +63,6 @@ typedef struct {
|
||||
|
||||
struct nfsd4_callback {
|
||||
struct nfs4_client *cb_clp;
|
||||
u32 cb_minorversion;
|
||||
struct rpc_message cb_msg;
|
||||
const struct nfsd4_callback_ops *cb_ops;
|
||||
struct work_struct cb_work;
|
||||
@ -441,11 +440,11 @@ struct nfs4_openowner {
|
||||
/*
|
||||
* Represents a generic "lockowner". Similar to an openowner. References to it
|
||||
* are held by the lock stateids that are created on its behalf. This object is
|
||||
* a superset of the nfs4_stateowner struct (or would be if it needed any extra
|
||||
* fields).
|
||||
* a superset of the nfs4_stateowner struct.
|
||||
*/
|
||||
struct nfs4_lockowner {
|
||||
struct nfs4_stateowner lo_owner; /* must be first element */
|
||||
struct nfs4_stateowner lo_owner; /* must be first element */
|
||||
struct list_head lo_blocked; /* blocked file_locks */
|
||||
};
|
||||
|
||||
static inline struct nfs4_openowner * openowner(struct nfs4_stateowner *so)
|
||||
@ -572,6 +571,7 @@ enum nfsd4_cb_op {
|
||||
NFSPROC4_CLNT_CB_RECALL,
|
||||
NFSPROC4_CLNT_CB_LAYOUT,
|
||||
NFSPROC4_CLNT_CB_SEQUENCE,
|
||||
NFSPROC4_CLNT_CB_NOTIFY_LOCK,
|
||||
};
|
||||
|
||||
/* Returns true iff a is later than b: */
|
||||
@ -580,6 +580,20 @@ static inline bool nfsd4_stateid_generation_after(stateid_t *a, stateid_t *b)
|
||||
return (s32)(a->si_generation - b->si_generation) > 0;
|
||||
}
|
||||
|
||||
/*
|
||||
* When a client tries to get a lock on a file, we set one of these objects
|
||||
* on the blocking lock. When the lock becomes free, we can then issue a
|
||||
* CB_NOTIFY_LOCK to the server.
|
||||
*/
|
||||
struct nfsd4_blocked_lock {
|
||||
struct list_head nbl_list;
|
||||
struct list_head nbl_lru;
|
||||
unsigned long nbl_time;
|
||||
struct file_lock nbl_lock;
|
||||
struct knfsd_fh nbl_fh;
|
||||
struct nfsd4_callback nbl_cb;
|
||||
};
|
||||
|
||||
struct nfsd4_compound_state;
|
||||
struct nfsd_net;
|
||||
|
||||
|
@ -513,6 +513,22 @@ __be32 nfsd4_clone_file_range(struct file *src, u64 src_pos, struct file *dst,
|
||||
count));
|
||||
}
|
||||
|
||||
ssize_t nfsd_copy_file_range(struct file *src, u64 src_pos, struct file *dst,
|
||||
u64 dst_pos, u64 count)
|
||||
{
|
||||
|
||||
/*
|
||||
* Limit copy to 4MB to prevent indefinitely blocking an nfsd
|
||||
* thread and client rpc slot. The choice of 4MB is somewhat
|
||||
* arbitrary. We might instead base this on r/wsize, or make it
|
||||
* tunable, or use a time instead of a byte limit, or implement
|
||||
* asynchronous copy. In theory a client could also recognize a
|
||||
* limit like this and pipeline multiple COPY requests.
|
||||
*/
|
||||
count = min_t(u64, count, 1 << 22);
|
||||
return vfs_copy_file_range(src, src_pos, dst, dst_pos, count, 0);
|
||||
}
|
||||
|
||||
__be32 nfsd4_vfs_fallocate(struct svc_rqst *rqstp, struct svc_fh *fhp,
|
||||
struct file *file, loff_t offset, loff_t len,
|
||||
int flags)
|
||||
|
@ -96,6 +96,8 @@ __be32 nfsd_symlink(struct svc_rqst *, struct svc_fh *,
|
||||
struct svc_fh *res);
|
||||
__be32 nfsd_link(struct svc_rqst *, struct svc_fh *,
|
||||
char *, int, struct svc_fh *);
|
||||
ssize_t nfsd_copy_file_range(struct file *, u64,
|
||||
struct file *, u64, u64);
|
||||
__be32 nfsd_rename(struct svc_rqst *,
|
||||
struct svc_fh *, char *, int,
|
||||
struct svc_fh *, char *, int);
|
||||
|
@ -503,6 +503,28 @@ struct nfsd4_clone {
|
||||
u64 cl_count;
|
||||
};
|
||||
|
||||
struct nfsd42_write_res {
|
||||
u64 wr_bytes_written;
|
||||
u32 wr_stable_how;
|
||||
nfs4_verifier wr_verifier;
|
||||
};
|
||||
|
||||
struct nfsd4_copy {
|
||||
/* request */
|
||||
stateid_t cp_src_stateid;
|
||||
stateid_t cp_dst_stateid;
|
||||
u64 cp_src_pos;
|
||||
u64 cp_dst_pos;
|
||||
u64 cp_count;
|
||||
|
||||
/* both */
|
||||
bool cp_consecutive;
|
||||
bool cp_synchronous;
|
||||
|
||||
/* response */
|
||||
struct nfsd42_write_res cp_res;
|
||||
};
|
||||
|
||||
struct nfsd4_seek {
|
||||
/* request */
|
||||
stateid_t seek_stateid;
|
||||
@ -568,6 +590,7 @@ struct nfsd4_op {
|
||||
struct nfsd4_fallocate allocate;
|
||||
struct nfsd4_fallocate deallocate;
|
||||
struct nfsd4_clone clone;
|
||||
struct nfsd4_copy copy;
|
||||
struct nfsd4_seek seek;
|
||||
} u;
|
||||
struct nfs4_replay * replay;
|
||||
|
@ -28,3 +28,12 @@
|
||||
#define NFS4_dec_cb_layout_sz (cb_compound_dec_hdr_sz + \
|
||||
cb_sequence_dec_sz + \
|
||||
op_dec_sz)
|
||||
|
||||
#define NFS4_enc_cb_notify_lock_sz (cb_compound_enc_hdr_sz + \
|
||||
cb_sequence_enc_sz + \
|
||||
2 + 1 + \
|
||||
XDR_QUADLEN(NFS4_OPAQUE_LIMIT) + \
|
||||
enc_nfs4_fh_sz)
|
||||
#define NFS4_dec_cb_notify_lock_sz (cb_compound_dec_hdr_sz + \
|
||||
cb_sequence_dec_sz + \
|
||||
op_dec_sz)
|
||||
|
@ -157,12 +157,13 @@ struct fid {
|
||||
* @fh_to_dentry is given a &struct super_block (@sb) and a file handle
|
||||
* fragment (@fh, @fh_len). It should return a &struct dentry which refers
|
||||
* to the same file that the file handle fragment refers to. If it cannot,
|
||||
* it should return a %NULL pointer if the file was found but no acceptable
|
||||
* &dentries were available, or an %ERR_PTR error code indicating why it
|
||||
* couldn't be found (e.g. %ENOENT or %ENOMEM). Any suitable dentry can be
|
||||
* returned including, if necessary, a new dentry created with d_alloc_root.
|
||||
* The caller can then find any other extant dentries by following the
|
||||
* d_alias links.
|
||||
* it should return a %NULL pointer if the file cannot be found, or an
|
||||
* %ERR_PTR error code of %ENOMEM if a memory allocation failure occurred.
|
||||
* Any other error code is treated like %NULL, and will cause an %ESTALE error
|
||||
* for callers of exportfs_decode_fh().
|
||||
* Any suitable dentry can be returned including, if necessary, a new dentry
|
||||
* created with d_alloc_root. The caller can then find any other extant
|
||||
* dentries by following the d_alias links.
|
||||
*
|
||||
* fh_to_parent:
|
||||
* Same as @fh_to_dentry, except that it returns a pointer to the parent
|
||||
|
@ -41,6 +41,7 @@
|
||||
#define _LINUX_SUNRPC_RPC_RDMA_H
|
||||
|
||||
#include <linux/types.h>
|
||||
#include <linux/bitops.h>
|
||||
|
||||
#define RPCRDMA_VERSION 1
|
||||
#define rpcrdma_version cpu_to_be32(RPCRDMA_VERSION)
|
||||
@ -129,4 +130,38 @@ enum rpcrdma_proc {
|
||||
#define rdma_done cpu_to_be32(RDMA_DONE)
|
||||
#define rdma_error cpu_to_be32(RDMA_ERROR)
|
||||
|
||||
/*
|
||||
* Private extension to RPC-over-RDMA Version One.
|
||||
* Message passed during RDMA-CM connection set-up.
|
||||
*
|
||||
* Add new fields at the end, and don't permute existing
|
||||
* fields.
|
||||
*/
|
||||
struct rpcrdma_connect_private {
|
||||
__be32 cp_magic;
|
||||
u8 cp_version;
|
||||
u8 cp_flags;
|
||||
u8 cp_send_size;
|
||||
u8 cp_recv_size;
|
||||
} __packed;
|
||||
|
||||
#define rpcrdma_cmp_magic __cpu_to_be32(0xf6ab0e18)
|
||||
|
||||
enum {
|
||||
RPCRDMA_CMP_VERSION = 1,
|
||||
RPCRDMA_CMP_F_SND_W_INV_OK = BIT(0),
|
||||
};
|
||||
|
||||
static inline u8
|
||||
rpcrdma_encode_buffer_size(unsigned int size)
|
||||
{
|
||||
return (size >> 10) - 1;
|
||||
}
|
||||
|
||||
static inline unsigned int
|
||||
rpcrdma_decode_buffer_size(u8 val)
|
||||
{
|
||||
return ((unsigned int)val + 1) << 10;
|
||||
}
|
||||
|
||||
#endif /* _LINUX_SUNRPC_RPC_RDMA_H */
|
||||
|
@ -86,6 +86,7 @@ struct svc_rdma_op_ctxt {
|
||||
unsigned long flags;
|
||||
enum dma_data_direction direction;
|
||||
int count;
|
||||
unsigned int mapped_sges;
|
||||
struct ib_sge sge[RPCSVC_MAXPAGES];
|
||||
struct page *pages[RPCSVC_MAXPAGES];
|
||||
};
|
||||
@ -136,6 +137,7 @@ struct svcxprt_rdma {
|
||||
int sc_ord; /* RDMA read limit */
|
||||
int sc_max_sge;
|
||||
int sc_max_sge_rd; /* max sge for read target */
|
||||
bool sc_snd_w_inv; /* OK to use Send With Invalidate */
|
||||
|
||||
atomic_t sc_sq_count; /* Number of SQ WR on queue */
|
||||
unsigned int sc_sq_depth; /* Depth of SQ */
|
||||
@ -193,6 +195,14 @@ struct svcxprt_rdma {
|
||||
|
||||
#define RPCSVC_MAXPAYLOAD_RDMA RPCSVC_MAXPAYLOAD
|
||||
|
||||
/* Track DMA maps for this transport and context */
|
||||
static inline void svc_rdma_count_mappings(struct svcxprt_rdma *rdma,
|
||||
struct svc_rdma_op_ctxt *ctxt)
|
||||
{
|
||||
ctxt->mapped_sges++;
|
||||
atomic_inc(&rdma->sc_dma_used);
|
||||
}
|
||||
|
||||
/* svc_rdma_backchannel.c */
|
||||
extern int svc_rdma_handle_bc_reply(struct rpc_xprt *xprt,
|
||||
struct rpcrdma_msg *rmsgp,
|
||||
|
@ -39,8 +39,9 @@
|
||||
#define NFS4_FH_VOL_MIGRATION 0x0004
|
||||
#define NFS4_FH_VOL_RENAME 0x0008
|
||||
|
||||
#define NFS4_OPEN_RESULT_CONFIRM 0x0002
|
||||
#define NFS4_OPEN_RESULT_LOCKTYPE_POSIX 0x0004
|
||||
#define NFS4_OPEN_RESULT_CONFIRM 0x0002
|
||||
#define NFS4_OPEN_RESULT_LOCKTYPE_POSIX 0x0004
|
||||
#define NFS4_OPEN_RESULT_MAY_NOTIFY_LOCK 0x0020
|
||||
|
||||
#define NFS4_SHARE_ACCESS_MASK 0x000F
|
||||
#define NFS4_SHARE_ACCESS_READ 0x0001
|
||||
|
@ -129,7 +129,7 @@ static int svc_rdma_bc_sendto(struct svcxprt_rdma *rdma,
|
||||
ret = -EIO;
|
||||
goto out_unmap;
|
||||
}
|
||||
atomic_inc(&rdma->sc_dma_used);
|
||||
svc_rdma_count_mappings(rdma, ctxt);
|
||||
|
||||
memset(&send_wr, 0, sizeof(send_wr));
|
||||
ctxt->cqe.done = svc_rdma_wc_send;
|
||||
|
@ -159,7 +159,7 @@ int rdma_read_chunk_lcl(struct svcxprt_rdma *xprt,
|
||||
ctxt->sge[pno].addr);
|
||||
if (ret)
|
||||
goto err;
|
||||
atomic_inc(&xprt->sc_dma_used);
|
||||
svc_rdma_count_mappings(xprt, ctxt);
|
||||
|
||||
ctxt->sge[pno].lkey = xprt->sc_pd->local_dma_lkey;
|
||||
ctxt->sge[pno].length = len;
|
||||
|
@ -225,6 +225,48 @@ svc_rdma_get_reply_array(struct rpcrdma_msg *rmsgp,
|
||||
return rp_ary;
|
||||
}
|
||||
|
||||
/* RPC-over-RDMA Version One private extension: Remote Invalidation.
|
||||
* Responder's choice: requester signals it can handle Send With
|
||||
* Invalidate, and responder chooses one rkey to invalidate.
|
||||
*
|
||||
* Find a candidate rkey to invalidate when sending a reply. Picks the
|
||||
* first rkey it finds in the chunks lists.
|
||||
*
|
||||
* Returns zero if RPC's chunk lists are empty.
|
||||
*/
|
||||
static u32 svc_rdma_get_inv_rkey(struct rpcrdma_msg *rdma_argp,
|
||||
struct rpcrdma_write_array *wr_ary,
|
||||
struct rpcrdma_write_array *rp_ary)
|
||||
{
|
||||
struct rpcrdma_read_chunk *rd_ary;
|
||||
struct rpcrdma_segment *arg_ch;
|
||||
u32 inv_rkey;
|
||||
|
||||
inv_rkey = 0;
|
||||
|
||||
rd_ary = svc_rdma_get_read_chunk(rdma_argp);
|
||||
if (rd_ary) {
|
||||
inv_rkey = be32_to_cpu(rd_ary->rc_target.rs_handle);
|
||||
goto out;
|
||||
}
|
||||
|
||||
if (wr_ary && be32_to_cpu(wr_ary->wc_nchunks)) {
|
||||
arg_ch = &wr_ary->wc_array[0].wc_target;
|
||||
inv_rkey = be32_to_cpu(arg_ch->rs_handle);
|
||||
goto out;
|
||||
}
|
||||
|
||||
if (rp_ary && be32_to_cpu(rp_ary->wc_nchunks)) {
|
||||
arg_ch = &rp_ary->wc_array[0].wc_target;
|
||||
inv_rkey = be32_to_cpu(arg_ch->rs_handle);
|
||||
goto out;
|
||||
}
|
||||
|
||||
out:
|
||||
dprintk("svcrdma: Send With Invalidate rkey=%08x\n", inv_rkey);
|
||||
return inv_rkey;
|
||||
}
|
||||
|
||||
/* Assumptions:
|
||||
* - The specified write_len can be represented in sc_max_sge * PAGE_SIZE
|
||||
*/
|
||||
@ -280,7 +322,7 @@ static int send_write(struct svcxprt_rdma *xprt, struct svc_rqst *rqstp,
|
||||
if (ib_dma_mapping_error(xprt->sc_cm_id->device,
|
||||
sge[sge_no].addr))
|
||||
goto err;
|
||||
atomic_inc(&xprt->sc_dma_used);
|
||||
svc_rdma_count_mappings(xprt, ctxt);
|
||||
sge[sge_no].lkey = xprt->sc_pd->local_dma_lkey;
|
||||
ctxt->count++;
|
||||
sge_off = 0;
|
||||
@ -464,7 +506,8 @@ static int send_reply(struct svcxprt_rdma *rdma,
|
||||
struct page *page,
|
||||
struct rpcrdma_msg *rdma_resp,
|
||||
struct svc_rdma_req_map *vec,
|
||||
int byte_count)
|
||||
int byte_count,
|
||||
u32 inv_rkey)
|
||||
{
|
||||
struct svc_rdma_op_ctxt *ctxt;
|
||||
struct ib_send_wr send_wr;
|
||||
@ -489,7 +532,7 @@ static int send_reply(struct svcxprt_rdma *rdma,
|
||||
ctxt->sge[0].length, DMA_TO_DEVICE);
|
||||
if (ib_dma_mapping_error(rdma->sc_cm_id->device, ctxt->sge[0].addr))
|
||||
goto err;
|
||||
atomic_inc(&rdma->sc_dma_used);
|
||||
svc_rdma_count_mappings(rdma, ctxt);
|
||||
|
||||
ctxt->direction = DMA_TO_DEVICE;
|
||||
|
||||
@ -505,7 +548,7 @@ static int send_reply(struct svcxprt_rdma *rdma,
|
||||
if (ib_dma_mapping_error(rdma->sc_cm_id->device,
|
||||
ctxt->sge[sge_no].addr))
|
||||
goto err;
|
||||
atomic_inc(&rdma->sc_dma_used);
|
||||
svc_rdma_count_mappings(rdma, ctxt);
|
||||
ctxt->sge[sge_no].lkey = rdma->sc_pd->local_dma_lkey;
|
||||
ctxt->sge[sge_no].length = sge_bytes;
|
||||
}
|
||||
@ -523,23 +566,9 @@ static int send_reply(struct svcxprt_rdma *rdma,
|
||||
ctxt->pages[page_no+1] = rqstp->rq_respages[page_no];
|
||||
ctxt->count++;
|
||||
rqstp->rq_respages[page_no] = NULL;
|
||||
/*
|
||||
* If there are more pages than SGE, terminate SGE
|
||||
* list so that svc_rdma_unmap_dma doesn't attempt to
|
||||
* unmap garbage.
|
||||
*/
|
||||
if (page_no+1 >= sge_no)
|
||||
ctxt->sge[page_no+1].length = 0;
|
||||
}
|
||||
rqstp->rq_next_page = rqstp->rq_respages + 1;
|
||||
|
||||
/* The loop above bumps sc_dma_used for each sge. The
|
||||
* xdr_buf.tail gets a separate sge, but resides in the
|
||||
* same page as xdr_buf.head. Don't count it twice.
|
||||
*/
|
||||
if (sge_no > ctxt->count)
|
||||
atomic_dec(&rdma->sc_dma_used);
|
||||
|
||||
if (sge_no > rdma->sc_max_sge) {
|
||||
pr_err("svcrdma: Too many sges (%d)\n", sge_no);
|
||||
goto err;
|
||||
@ -549,7 +578,11 @@ static int send_reply(struct svcxprt_rdma *rdma,
|
||||
send_wr.wr_cqe = &ctxt->cqe;
|
||||
send_wr.sg_list = ctxt->sge;
|
||||
send_wr.num_sge = sge_no;
|
||||
send_wr.opcode = IB_WR_SEND;
|
||||
if (inv_rkey) {
|
||||
send_wr.opcode = IB_WR_SEND_WITH_INV;
|
||||
send_wr.ex.invalidate_rkey = inv_rkey;
|
||||
} else
|
||||
send_wr.opcode = IB_WR_SEND;
|
||||
send_wr.send_flags = IB_SEND_SIGNALED;
|
||||
|
||||
ret = svc_rdma_send(rdma, &send_wr);
|
||||
@ -581,6 +614,7 @@ int svc_rdma_sendto(struct svc_rqst *rqstp)
|
||||
int inline_bytes;
|
||||
struct page *res_page;
|
||||
struct svc_rdma_req_map *vec;
|
||||
u32 inv_rkey;
|
||||
|
||||
dprintk("svcrdma: sending response for rqstp=%p\n", rqstp);
|
||||
|
||||
@ -591,6 +625,10 @@ int svc_rdma_sendto(struct svc_rqst *rqstp)
|
||||
wr_ary = svc_rdma_get_write_array(rdma_argp);
|
||||
rp_ary = svc_rdma_get_reply_array(rdma_argp, wr_ary);
|
||||
|
||||
inv_rkey = 0;
|
||||
if (rdma->sc_snd_w_inv)
|
||||
inv_rkey = svc_rdma_get_inv_rkey(rdma_argp, wr_ary, rp_ary);
|
||||
|
||||
/* Build an req vec for the XDR */
|
||||
vec = svc_rdma_get_req_map(rdma);
|
||||
ret = svc_rdma_map_xdr(rdma, &rqstp->rq_res, vec, wr_ary != NULL);
|
||||
@ -633,9 +671,9 @@ int svc_rdma_sendto(struct svc_rqst *rqstp)
|
||||
goto err1;
|
||||
|
||||
ret = send_reply(rdma, rqstp, res_page, rdma_resp, vec,
|
||||
inline_bytes);
|
||||
inline_bytes, inv_rkey);
|
||||
if (ret < 0)
|
||||
goto err1;
|
||||
goto err0;
|
||||
|
||||
svc_rdma_put_req_map(rdma, vec);
|
||||
dprintk("svcrdma: send_reply returns %d\n", ret);
|
||||
@ -692,7 +730,7 @@ void svc_rdma_send_error(struct svcxprt_rdma *xprt, struct rpcrdma_msg *rmsgp,
|
||||
svc_rdma_put_context(ctxt, 1);
|
||||
return;
|
||||
}
|
||||
atomic_inc(&xprt->sc_dma_used);
|
||||
svc_rdma_count_mappings(xprt, ctxt);
|
||||
|
||||
/* Prepare SEND WR */
|
||||
memset(&err_wr, 0, sizeof(err_wr));
|
||||
|
@ -198,6 +198,7 @@ struct svc_rdma_op_ctxt *svc_rdma_get_context(struct svcxprt_rdma *xprt)
|
||||
|
||||
out:
|
||||
ctxt->count = 0;
|
||||
ctxt->mapped_sges = 0;
|
||||
ctxt->frmr = NULL;
|
||||
return ctxt;
|
||||
|
||||
@ -221,22 +222,27 @@ out_empty:
|
||||
void svc_rdma_unmap_dma(struct svc_rdma_op_ctxt *ctxt)
|
||||
{
|
||||
struct svcxprt_rdma *xprt = ctxt->xprt;
|
||||
int i;
|
||||
for (i = 0; i < ctxt->count && ctxt->sge[i].length; i++) {
|
||||
struct ib_device *device = xprt->sc_cm_id->device;
|
||||
u32 lkey = xprt->sc_pd->local_dma_lkey;
|
||||
unsigned int i, count;
|
||||
|
||||
for (count = 0, i = 0; i < ctxt->mapped_sges; i++) {
|
||||
/*
|
||||
* Unmap the DMA addr in the SGE if the lkey matches
|
||||
* the local_dma_lkey, otherwise, ignore it since it is
|
||||
* an FRMR lkey and will be unmapped later when the
|
||||
* last WR that uses it completes.
|
||||
*/
|
||||
if (ctxt->sge[i].lkey == xprt->sc_pd->local_dma_lkey) {
|
||||
atomic_dec(&xprt->sc_dma_used);
|
||||
ib_dma_unmap_page(xprt->sc_cm_id->device,
|
||||
if (ctxt->sge[i].lkey == lkey) {
|
||||
count++;
|
||||
ib_dma_unmap_page(device,
|
||||
ctxt->sge[i].addr,
|
||||
ctxt->sge[i].length,
|
||||
ctxt->direction);
|
||||
}
|
||||
}
|
||||
ctxt->mapped_sges = 0;
|
||||
atomic_sub(count, &xprt->sc_dma_used);
|
||||
}
|
||||
|
||||
void svc_rdma_put_context(struct svc_rdma_op_ctxt *ctxt, int free_pages)
|
||||
@ -600,7 +606,7 @@ int svc_rdma_post_recv(struct svcxprt_rdma *xprt, gfp_t flags)
|
||||
DMA_FROM_DEVICE);
|
||||
if (ib_dma_mapping_error(xprt->sc_cm_id->device, pa))
|
||||
goto err_put_ctxt;
|
||||
atomic_inc(&xprt->sc_dma_used);
|
||||
svc_rdma_count_mappings(xprt, ctxt);
|
||||
ctxt->sge[sge_no].addr = pa;
|
||||
ctxt->sge[sge_no].length = PAGE_SIZE;
|
||||
ctxt->sge[sge_no].lkey = xprt->sc_pd->local_dma_lkey;
|
||||
@ -642,6 +648,26 @@ int svc_rdma_repost_recv(struct svcxprt_rdma *xprt, gfp_t flags)
|
||||
return ret;
|
||||
}
|
||||
|
||||
static void
|
||||
svc_rdma_parse_connect_private(struct svcxprt_rdma *newxprt,
|
||||
struct rdma_conn_param *param)
|
||||
{
|
||||
const struct rpcrdma_connect_private *pmsg = param->private_data;
|
||||
|
||||
if (pmsg &&
|
||||
pmsg->cp_magic == rpcrdma_cmp_magic &&
|
||||
pmsg->cp_version == RPCRDMA_CMP_VERSION) {
|
||||
newxprt->sc_snd_w_inv = pmsg->cp_flags &
|
||||
RPCRDMA_CMP_F_SND_W_INV_OK;
|
||||
|
||||
dprintk("svcrdma: client send_size %u, recv_size %u "
|
||||
"remote inv %ssupported\n",
|
||||
rpcrdma_decode_buffer_size(pmsg->cp_send_size),
|
||||
rpcrdma_decode_buffer_size(pmsg->cp_recv_size),
|
||||
newxprt->sc_snd_w_inv ? "" : "un");
|
||||
}
|
||||
}
|
||||
|
||||
/*
|
||||
* This function handles the CONNECT_REQUEST event on a listening
|
||||
* endpoint. It is passed the cma_id for the _new_ connection. The context in
|
||||
@ -653,7 +679,8 @@ int svc_rdma_repost_recv(struct svcxprt_rdma *xprt, gfp_t flags)
|
||||
* will call the recvfrom method on the listen xprt which will accept the new
|
||||
* connection.
|
||||
*/
|
||||
static void handle_connect_req(struct rdma_cm_id *new_cma_id, size_t client_ird)
|
||||
static void handle_connect_req(struct rdma_cm_id *new_cma_id,
|
||||
struct rdma_conn_param *param)
|
||||
{
|
||||
struct svcxprt_rdma *listen_xprt = new_cma_id->context;
|
||||
struct svcxprt_rdma *newxprt;
|
||||
@ -669,9 +696,10 @@ static void handle_connect_req(struct rdma_cm_id *new_cma_id, size_t client_ird)
|
||||
new_cma_id->context = newxprt;
|
||||
dprintk("svcrdma: Creating newxprt=%p, cm_id=%p, listenxprt=%p\n",
|
||||
newxprt, newxprt->sc_cm_id, listen_xprt);
|
||||
svc_rdma_parse_connect_private(newxprt, param);
|
||||
|
||||
/* Save client advertised inbound read limit for use later in accept. */
|
||||
newxprt->sc_ord = client_ird;
|
||||
newxprt->sc_ord = param->initiator_depth;
|
||||
|
||||
/* Set the local and remote addresses in the transport */
|
||||
sa = (struct sockaddr *)&newxprt->sc_cm_id->route.addr.dst_addr;
|
||||
@ -706,8 +734,7 @@ static int rdma_listen_handler(struct rdma_cm_id *cma_id,
|
||||
dprintk("svcrdma: Connect request on cma_id=%p, xprt = %p, "
|
||||
"event = %s (%d)\n", cma_id, cma_id->context,
|
||||
rdma_event_msg(event->event), event->event);
|
||||
handle_connect_req(cma_id,
|
||||
event->param.conn.initiator_depth);
|
||||
handle_connect_req(cma_id, &event->param.conn);
|
||||
break;
|
||||
|
||||
case RDMA_CM_EVENT_ESTABLISHED:
|
||||
@ -941,6 +968,7 @@ static struct svc_xprt *svc_rdma_accept(struct svc_xprt *xprt)
|
||||
struct svcxprt_rdma *listen_rdma;
|
||||
struct svcxprt_rdma *newxprt = NULL;
|
||||
struct rdma_conn_param conn_param;
|
||||
struct rpcrdma_connect_private pmsg;
|
||||
struct ib_qp_init_attr qp_attr;
|
||||
struct ib_device *dev;
|
||||
unsigned int i;
|
||||
@ -1070,7 +1098,8 @@ static struct svc_xprt *svc_rdma_accept(struct svc_xprt *xprt)
|
||||
dev->attrs.max_fast_reg_page_list_len;
|
||||
newxprt->sc_dev_caps |= SVCRDMA_DEVCAP_FAST_REG;
|
||||
newxprt->sc_reader = rdma_read_chunk_frmr;
|
||||
}
|
||||
} else
|
||||
newxprt->sc_snd_w_inv = false;
|
||||
|
||||
/*
|
||||
* Determine if a DMA MR is required and if so, what privs are required
|
||||
@ -1094,11 +1123,20 @@ static struct svc_xprt *svc_rdma_accept(struct svc_xprt *xprt)
|
||||
/* Swap out the handler */
|
||||
newxprt->sc_cm_id->event_handler = rdma_cma_handler;
|
||||
|
||||
/* Construct RDMA-CM private message */
|
||||
pmsg.cp_magic = rpcrdma_cmp_magic;
|
||||
pmsg.cp_version = RPCRDMA_CMP_VERSION;
|
||||
pmsg.cp_flags = 0;
|
||||
pmsg.cp_send_size = pmsg.cp_recv_size =
|
||||
rpcrdma_encode_buffer_size(newxprt->sc_max_req_size);
|
||||
|
||||
/* Accept Connection */
|
||||
set_bit(RDMAXPRT_CONN_PENDING, &newxprt->sc_flags);
|
||||
memset(&conn_param, 0, sizeof conn_param);
|
||||
conn_param.responder_resources = 0;
|
||||
conn_param.initiator_depth = newxprt->sc_ord;
|
||||
conn_param.private_data = &pmsg;
|
||||
conn_param.private_data_len = sizeof(pmsg);
|
||||
ret = rdma_accept(newxprt->sc_cm_id, &conn_param);
|
||||
if (ret) {
|
||||
dprintk("svcrdma: failed to accept new connection, ret=%d\n",
|
||||
|
Loading…
Reference in New Issue
Block a user