diff --git a/Documentation/filesystems/nfs/exporting.rst b/Documentation/filesystems/nfs/exporting.rst index 33d588a01ace..cbe542ad5233 100644 --- a/Documentation/filesystems/nfs/exporting.rst +++ b/Documentation/filesystems/nfs/exporting.rst @@ -154,6 +154,11 @@ struct which has the following members: to find potential names, and matches inode numbers to find the correct match. + flags + Some filesystems may need to be handled differently than others. The + export_operations struct also includes a flags field that allows the + filesystem to communicate such information to nfsd. See the Export + Operations Flags section below for more explanation. A filehandle fragment consists of an array of 1 or more 4byte words, together with a one byte "type". @@ -163,3 +168,25 @@ generated by encode_fh, in which case it will have been padded with nuls. Rather, the encode_fh routine should choose a "type" which indicates the decode_fh how much of the filehandle is valid, and how it should be interpreted. + +Export Operations Flags +----------------------- +In addition to the operation vector pointers, struct export_operations also +contains a "flags" field that allows the filesystem to communicate to nfsd +that it may want to do things differently when dealing with it. The +following flags are defined: + + EXPORT_OP_NOWCC - disable NFSv3 WCC attributes on this filesystem + RFC 1813 recommends that servers always send weak cache consistency + (WCC) data to the client after each operation. The server should + atomically collect attributes about the inode, do an operation on it, + and then collect the attributes afterward. This allows the client to + skip issuing GETATTRs in some situations but means that the server + is calling vfs_getattr for almost all RPCs. On some filesystems + (particularly those that are clustered or networked) this is expensive + and atomicity is difficult to guarantee. This flag indicates to nfsd + that it should skip providing WCC attributes to the client in NFSv3 + replies when doing operations on this filesystem. Consider enabling + this on filesystems that have an expensive ->getattr inode operation, + or when atomicity between pre and post operation attribute collection + is impossible to guarantee. diff --git a/fs/nfs/export.c b/fs/nfs/export.c index 3430d6891e89..8f4c528865c5 100644 --- a/fs/nfs/export.c +++ b/fs/nfs/export.c @@ -171,4 +171,5 @@ const struct export_operations nfs_export_ops = { .encode_fh = nfs_encode_fh, .fh_to_dentry = nfs_fh_to_dentry, .get_parent = nfs_get_parent, + .flags = EXPORT_OP_NOWCC, }; diff --git a/fs/nfsd/nfs3xdr.c b/fs/nfsd/nfs3xdr.c index 7d44e10a5f5d..34b880211e5e 100644 --- a/fs/nfsd/nfs3xdr.c +++ b/fs/nfsd/nfs3xdr.c @@ -206,7 +206,7 @@ static __be32 * encode_post_op_attr(struct svc_rqst *rqstp, __be32 *p, struct svc_fh *fhp) { struct dentry *dentry = fhp->fh_dentry; - if (dentry && d_really_is_positive(dentry)) { + if (!fhp->fh_no_wcc && dentry && d_really_is_positive(dentry)) { __be32 err; struct kstat stat; @@ -262,7 +262,7 @@ void fill_pre_wcc(struct svc_fh *fhp) bool v4 = (fhp->fh_maxsize == NFS4_FHSIZE); __be32 err; - if (fhp->fh_pre_saved) + if (fhp->fh_no_wcc || fhp->fh_pre_saved) return; inode = d_inode(fhp->fh_dentry); err = fh_getattr(fhp, &stat); @@ -290,6 +290,9 @@ void fill_post_wcc(struct svc_fh *fhp) struct inode *inode = d_inode(fhp->fh_dentry); __be32 err; + if (fhp->fh_no_wcc) + return; + if (fhp->fh_post_saved) printk("nfsd: inode locked twice during operation.\n"); diff --git a/fs/nfsd/nfsfh.c b/fs/nfsd/nfsfh.c index c81dbbad8792..9c29a523f484 100644 --- a/fs/nfsd/nfsfh.c +++ b/fs/nfsd/nfsfh.c @@ -291,6 +291,16 @@ static __be32 nfsd_set_fh_dentry(struct svc_rqst *rqstp, struct svc_fh *fhp) fhp->fh_dentry = dentry; fhp->fh_export = exp; + + switch (rqstp->rq_vers) { + case 3: + if (dentry->d_sb->s_export_op->flags & EXPORT_OP_NOWCC) + fhp->fh_no_wcc = true; + break; + case 2: + fhp->fh_no_wcc = true; + } + return 0; out: exp_put(exp); @@ -559,6 +569,9 @@ fh_compose(struct svc_fh *fhp, struct svc_export *exp, struct dentry *dentry, */ set_version_and_fsid_type(fhp, exp, ref_fh); + /* If we have a ref_fh, then copy the fh_no_wcc setting from it. */ + fhp->fh_no_wcc = ref_fh ? ref_fh->fh_no_wcc : false; + if (ref_fh == fhp) fh_put(ref_fh); @@ -662,6 +675,7 @@ fh_put(struct svc_fh *fhp) exp_put(exp); fhp->fh_export = NULL; } + fhp->fh_no_wcc = false; return; } diff --git a/fs/nfsd/nfsfh.h b/fs/nfsd/nfsfh.h index 45bd776290d5..347d10aa6265 100644 --- a/fs/nfsd/nfsfh.h +++ b/fs/nfsd/nfsfh.h @@ -35,6 +35,7 @@ typedef struct svc_fh { bool fh_locked; /* inode locked by us */ bool fh_want_write; /* remount protection taken */ + bool fh_no_wcc; /* no wcc data needed */ int fh_flags; /* FH flags */ #ifdef CONFIG_NFSD_V3 bool fh_post_saved; /* post-op attrs saved */ @@ -54,7 +55,6 @@ typedef struct svc_fh { struct kstat fh_post_attr; /* full attrs after operation */ u64 fh_post_change; /* nfsv4 change; see above */ #endif /* CONFIG_NFSD_V3 */ - } svc_fh; #define NFSD4_FH_FOREIGN (1<<0) #define SET_FH_FLAG(c, f) ((c)->fh_flags |= (f)) diff --git a/include/linux/exportfs.h b/include/linux/exportfs.h index 3ceb72b67a7a..e7de0103a32e 100644 --- a/include/linux/exportfs.h +++ b/include/linux/exportfs.h @@ -213,6 +213,8 @@ struct export_operations { bool write, u32 *device_generation); int (*commit_blocks)(struct inode *inode, struct iomap *iomaps, int nr_iomaps, struct iattr *iattr); +#define EXPORT_OP_NOWCC (0x1) /* Don't collect wcc data for NFSv3 replies */ + unsigned long flags; }; extern int exportfs_encode_inode_fh(struct inode *inode, struct fid *fid,