From 9c75576e3bbf0153e92ab51ff161962d8632c290 Mon Sep 17 00:00:00 2001 From: Anna Schumaker Date: Wed, 20 Mar 2024 17:11:19 -0400 Subject: [PATCH 01/15] pNFS/filelayout: Remove the whole file layout requirement Layout segments have been supported in pNFS for years, so remove the requirement that the server always sends whole file layouts. Signed-off-by: Anna Schumaker Signed-off-by: Trond Myklebust --- fs/nfs/filelayout/filelayout.c | 8 -------- 1 file changed, 8 deletions(-) diff --git a/fs/nfs/filelayout/filelayout.c b/fs/nfs/filelayout/filelayout.c index ce8f8934bca5..3fb18b16a5b4 100644 --- a/fs/nfs/filelayout/filelayout.c +++ b/fs/nfs/filelayout/filelayout.c @@ -605,14 +605,6 @@ filelayout_check_layout(struct pnfs_layout_hdr *lo, dprintk("--> %s\n", __func__); - /* FIXME: remove this check when layout segment support is added */ - if (lgr->range.offset != 0 || - lgr->range.length != NFS4_MAX_UINT64) { - dprintk("%s Only whole file layouts supported. Use MDS i/o\n", - __func__); - goto out; - } - if (fl->pattern_offset > lgr->range.offset) { dprintk("%s pattern_offset %lld too large\n", __func__, fl->pattern_offset); From 464b424fb09b894f792a494f10539c190db503cf Mon Sep 17 00:00:00 2001 From: Anna Schumaker Date: Wed, 20 Mar 2024 17:11:20 -0400 Subject: [PATCH 02/15] pNFS/filelayout: Specify the layout segment range in LAYOUTGET Move from only requesting full file layout segments to requesting layout segments that match our I/O size. This means the server is still free to return a full file layout if it wants, but partial layouts will no longer cause an error. Signed-off-by: Anna Schumaker Signed-off-by: Trond Myklebust --- fs/nfs/filelayout/filelayout.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/fs/nfs/filelayout/filelayout.c b/fs/nfs/filelayout/filelayout.c index 3fb18b16a5b4..cc2ed4b5a4fd 100644 --- a/fs/nfs/filelayout/filelayout.c +++ b/fs/nfs/filelayout/filelayout.c @@ -871,8 +871,8 @@ filelayout_pg_init_read(struct nfs_pageio_descriptor *pgio, if (!pgio->pg_lseg) { pgio->pg_lseg = fl_pnfs_update_layout(pgio->pg_inode, nfs_req_openctx(req), - 0, - NFS4_MAX_UINT64, + req_offset(req), + req->wb_bytes, IOMODE_READ, false, GFP_KERNEL); @@ -895,8 +895,8 @@ filelayout_pg_init_write(struct nfs_pageio_descriptor *pgio, if (!pgio->pg_lseg) { pgio->pg_lseg = fl_pnfs_update_layout(pgio->pg_inode, nfs_req_openctx(req), - 0, - NFS4_MAX_UINT64, + req_offset(req), + req->wb_bytes, IOMODE_RW, false, GFP_NOFS); From 7c6c5249f061b64fc6b5b90bc147169a048691bf Mon Sep 17 00:00:00 2001 From: NeilBrown Date: Mon, 25 Mar 2024 16:36:05 +1100 Subject: [PATCH 03/15] NFS: add atomic_open for NFSv3 to handle O_TRUNC correctly. With two clients, each with NFSv3 mounts of the same directory, the sequence: client1 client2 ls -l afile echo hello there > afile echo HELLO > afile cat afile will show HELLO there because the O_TRUNC requested in the final 'echo' doesn't take effect. This is because the "Negative dentry, just create a file" section in lookup_open() assumes that the file *does* get created since the dentry was negative, so it sets FMODE_CREATED, and this causes do_open() to clear O_TRUNC and so the file doesn't get truncated. Even mounting with -o lookupcache=none does not help as nfs_neg_need_reval() always returns false if LOOKUP_CREATE is set. This patch fixes the problem by providing an atomic_open inode operation for NFSv3 (and v2). The code is largely the code from the branch in lookup_open() when atomic_open is not provided. The significant change is that the O_TRUNC flag is passed a new nfs_do_create() which add 'trunc' handling to nfs_create(). With this change we also optimise away an unnecessary LOOKUP before the file is created. Signed-off-by: NeilBrown Signed-off-by: Trond Myklebust --- fs/nfs/dir.c | 54 +++++++++++++++++++++++++++++++++++++++--- fs/nfs/nfs3proc.c | 1 + fs/nfs/proc.c | 1 + include/linux/nfs_fs.h | 3 +++ 4 files changed, 56 insertions(+), 3 deletions(-) diff --git a/fs/nfs/dir.c b/fs/nfs/dir.c index ac505671efbd..342930996226 100644 --- a/fs/nfs/dir.c +++ b/fs/nfs/dir.c @@ -56,6 +56,8 @@ static int nfs_readdir(struct file *, struct dir_context *); static int nfs_fsync_dir(struct file *, loff_t, loff_t, int); static loff_t nfs_llseek_dir(struct file *, loff_t, int); static void nfs_readdir_clear_array(struct folio *); +static int nfs_do_create(struct inode *dir, struct dentry *dentry, + umode_t mode, int open_flags); const struct file_operations nfs_dir_operations = { .llseek = nfs_llseek_dir, @@ -2243,6 +2245,41 @@ static int nfs4_lookup_revalidate(struct dentry *dentry, unsigned int flags) #endif /* CONFIG_NFSV4 */ +int nfs_atomic_open_v23(struct inode *dir, struct dentry *dentry, + struct file *file, unsigned int open_flags, + umode_t mode) +{ + + /* Same as look+open from lookup_open(), but with different O_TRUNC + * handling. + */ + int error = 0; + + if (open_flags & O_CREAT) { + file->f_mode |= FMODE_CREATED; + error = nfs_do_create(dir, dentry, mode, open_flags); + if (error) + return error; + return finish_open(file, dentry, NULL); + } else if (d_in_lookup(dentry)) { + /* The only flags nfs_lookup considers are + * LOOKUP_EXCL and LOOKUP_RENAME_TARGET, and + * we want those to be zero so the lookup isn't skipped. + */ + struct dentry *res = nfs_lookup(dir, dentry, 0); + + d_lookup_done(dentry); + if (unlikely(res)) { + if (IS_ERR(res)) + return PTR_ERR(res); + return finish_no_open(file, res); + } + } + return finish_no_open(file, NULL); + +} +EXPORT_SYMBOL_GPL(nfs_atomic_open_v23); + struct dentry * nfs_add_or_obtain(struct dentry *dentry, struct nfs_fh *fhandle, struct nfs_fattr *fattr) @@ -2303,18 +2340,23 @@ EXPORT_SYMBOL_GPL(nfs_instantiate); * that the operation succeeded on the server, but an error in the * reply path made it appear to have failed. */ -int nfs_create(struct mnt_idmap *idmap, struct inode *dir, - struct dentry *dentry, umode_t mode, bool excl) +static int nfs_do_create(struct inode *dir, struct dentry *dentry, + umode_t mode, int open_flags) { struct iattr attr; - int open_flags = excl ? O_CREAT | O_EXCL : O_CREAT; int error; + open_flags |= O_CREAT; + dfprintk(VFS, "NFS: create(%s/%lu), %pd\n", dir->i_sb->s_id, dir->i_ino, dentry); attr.ia_mode = mode; attr.ia_valid = ATTR_MODE; + if (open_flags & O_TRUNC) { + attr.ia_size = 0; + attr.ia_valid |= ATTR_SIZE; + } trace_nfs_create_enter(dir, dentry, open_flags); error = NFS_PROTO(dir)->create(dir, dentry, &attr, open_flags); @@ -2326,6 +2368,12 @@ out_err: d_drop(dentry); return error; } + +int nfs_create(struct mnt_idmap *idmap, struct inode *dir, + struct dentry *dentry, umode_t mode, bool excl) +{ + return nfs_do_create(dir, dentry, mode, excl ? O_EXCL : 0); +} EXPORT_SYMBOL_GPL(nfs_create); /* diff --git a/fs/nfs/nfs3proc.c b/fs/nfs/nfs3proc.c index cbbe3f0193b8..74bda639a7cf 100644 --- a/fs/nfs/nfs3proc.c +++ b/fs/nfs/nfs3proc.c @@ -986,6 +986,7 @@ static int nfs3_have_delegation(struct inode *inode, fmode_t flags) static const struct inode_operations nfs3_dir_inode_operations = { .create = nfs_create, + .atomic_open = nfs_atomic_open_v23, .lookup = nfs_lookup, .link = nfs_link, .unlink = nfs_unlink, diff --git a/fs/nfs/proc.c b/fs/nfs/proc.c index ad3a321ae997..d105e5b2659d 100644 --- a/fs/nfs/proc.c +++ b/fs/nfs/proc.c @@ -695,6 +695,7 @@ static int nfs_have_delegation(struct inode *inode, fmode_t flags) static const struct inode_operations nfs_dir_inode_operations = { .create = nfs_create, .lookup = nfs_lookup, + .atomic_open = nfs_atomic_open_v23, .link = nfs_link, .unlink = nfs_unlink, .symlink = nfs_symlink, diff --git a/include/linux/nfs_fs.h b/include/linux/nfs_fs.h index d59116ac8209..039898d70954 100644 --- a/include/linux/nfs_fs.h +++ b/include/linux/nfs_fs.h @@ -561,6 +561,9 @@ extern int nfs_may_open(struct inode *inode, const struct cred *cred, int openfl extern void nfs_access_zap_cache(struct inode *inode); extern int nfs_access_get_cached(struct inode *inode, const struct cred *cred, u32 *mask, bool may_block); +extern int nfs_atomic_open_v23(struct inode *dir, struct dentry *dentry, + struct file *file, unsigned int open_flags, + umode_t mode); /* * linux/fs/nfs/symlink.c From bf95f82e6a569f41bae1e37204b219a5e1e8b971 Mon Sep 17 00:00:00 2001 From: Chen Hanxiao Date: Tue, 2 Apr 2024 18:33:55 +0800 Subject: [PATCH 04/15] NFS: make sure lock/nolock overriding local_lock mount option Currently, mount option lock/nolock and local_lock option may override NFS_MOUNT_LOCAL_FLOCK NFS_MOUNT_LOCAL_FCNTL flags when passing in different order: mount -o vers=3,local_lock=all,lock: local_lock=none mount -o vers=3,lock,local_lock=all: local_lock=all This patch will let lock/nolock override local_lock option as nfs(5) suggested. Signed-off-by: Chen Hanxiao Signed-off-by: Trond Myklebust --- fs/nfs/fs_context.c | 2 ++ fs/nfs/internal.h | 7 +++++++ fs/nfs/super.c | 10 ++++++++++ 3 files changed, 19 insertions(+) diff --git a/fs/nfs/fs_context.c b/fs/nfs/fs_context.c index d0a0956f8a13..ec93306b7e79 100644 --- a/fs/nfs/fs_context.c +++ b/fs/nfs/fs_context.c @@ -600,9 +600,11 @@ static int nfs_fs_context_parse_param(struct fs_context *fc, break; case Opt_lock: if (result.negated) { + ctx->lock_status = NFS_LOCK_NOLOCK; ctx->flags |= NFS_MOUNT_NONLM; ctx->flags |= (NFS_MOUNT_LOCAL_FLOCK | NFS_MOUNT_LOCAL_FCNTL); } else { + ctx->lock_status = NFS_LOCK_LOCK; ctx->flags &= ~NFS_MOUNT_NONLM; ctx->flags &= ~(NFS_MOUNT_LOCAL_FLOCK | NFS_MOUNT_LOCAL_FCNTL); } diff --git a/fs/nfs/internal.h b/fs/nfs/internal.h index 06253695fe53..dc0693b3d214 100644 --- a/fs/nfs/internal.h +++ b/fs/nfs/internal.h @@ -112,6 +112,7 @@ struct nfs_fs_context { unsigned short protofamily; unsigned short mountfamily; bool has_sec_mnt_opts; + int lock_status; struct { union { @@ -153,6 +154,12 @@ struct nfs_fs_context { } clone_data; }; +enum nfs_lock_status { + NFS_LOCK_NOT_SET = 0, + NFS_LOCK_LOCK = 1, + NFS_LOCK_NOLOCK = 2, +}; + #define nfs_errorf(fc, fmt, ...) ((fc)->log.log ? \ errorf(fc, fmt, ## __VA_ARGS__) : \ ({ dprintk(fmt "\n", ## __VA_ARGS__); })) diff --git a/fs/nfs/super.c b/fs/nfs/super.c index dc03f98f7616..cbbd4866b0b7 100644 --- a/fs/nfs/super.c +++ b/fs/nfs/super.c @@ -901,6 +901,16 @@ static struct nfs_server *nfs_try_mount_request(struct fs_context *fc) rpc_authflavor_t authlist[NFS_MAX_SECFLAVORS]; unsigned int authlist_len = ARRAY_SIZE(authlist); + /* make sure 'nolock'/'lock' override the 'local_lock' mount option */ + if (ctx->lock_status) { + if (ctx->lock_status == NFS_LOCK_NOLOCK) { + ctx->flags |= NFS_MOUNT_NONLM; + ctx->flags |= (NFS_MOUNT_LOCAL_FLOCK | NFS_MOUNT_LOCAL_FCNTL); + } else { + ctx->flags &= ~NFS_MOUNT_NONLM; + ctx->flags &= ~(NFS_MOUNT_LOCAL_FLOCK | NFS_MOUNT_LOCAL_FCNTL); + } + } status = nfs_request_mount(fc, ctx->mntfh, authlist, &authlist_len); if (status) return ERR_PTR(status); From 37ffe06537af3e3ec212e7cbe941046fce0a822f Mon Sep 17 00:00:00 2001 From: Benjamin Coddington Date: Wed, 17 Apr 2024 14:49:29 -0400 Subject: [PATCH 05/15] NFSv4: Fixup smatch warning for ambiguous return Dan Carpenter reports smatch warning for nfs4_try_migration() when a memory allocation failure results in a zero return value. In this case, a transient allocation failure error will likely be retried the next time the server responds with NFS4ERR_MOVED. We can fixup the smatch warning with a small refactor: attempt all three allocations before testing and returning on a failure. Reported-by: Dan Carpenter Fixes: c3ed222745d9 ("NFSv4: Fix free of uninitialized nfs4_label on referral lookup.") Signed-off-by: Benjamin Coddington Reviewed-by: Dan Carpenter Reviewed-by: Chuck Lever Signed-off-by: Trond Myklebust --- fs/nfs/nfs4state.c | 12 +++++------- 1 file changed, 5 insertions(+), 7 deletions(-) diff --git a/fs/nfs/nfs4state.c b/fs/nfs/nfs4state.c index 662e86ea3a2d..5b452411e8fd 100644 --- a/fs/nfs/nfs4state.c +++ b/fs/nfs/nfs4state.c @@ -2116,6 +2116,7 @@ static int nfs4_try_migration(struct nfs_server *server, const struct cred *cred { struct nfs_client *clp = server->nfs_client; struct nfs4_fs_locations *locations = NULL; + struct nfs_fattr *fattr; struct inode *inode; struct page *page; int status, result; @@ -2125,19 +2126,16 @@ static int nfs4_try_migration(struct nfs_server *server, const struct cred *cred (unsigned long long)server->fsid.minor, clp->cl_hostname); - result = 0; page = alloc_page(GFP_KERNEL); locations = kmalloc(sizeof(struct nfs4_fs_locations), GFP_KERNEL); - if (page == NULL || locations == NULL) { - dprintk("<-- %s: no memory\n", __func__); - goto out; - } - locations->fattr = nfs_alloc_fattr(); - if (locations->fattr == NULL) { + fattr = nfs_alloc_fattr(); + if (page == NULL || locations == NULL || fattr == NULL) { dprintk("<-- %s: no memory\n", __func__); + result = 0; goto out; } + locations->fattr = fattr; inode = d_inode(server->super->s_root); result = nfs4_proc_get_locations(server, NFS_FH(inode), locations, page, cred); From b322bf9e983addedff0894c55e92d58f4d16d92a Mon Sep 17 00:00:00 2001 From: Martin Kaiser Date: Sun, 14 Apr 2024 19:01:09 +0200 Subject: [PATCH 06/15] nfs: keep server info for remounts With newer kernels that use fs_context for nfs mounts, remounts fail with -EINVAL. $ mount -t nfs -o nolock 10.0.0.1:/tmp/test /mnt/test/ $ mount -t nfs -o remount /mnt/test/ mount: mounting 10.0.0.1:/tmp/test on /mnt/test failed: Invalid argument For remounts, the nfs server address and port are populated by nfs_init_fs_context and later overwritten with 0x00 bytes by nfs23_parse_monolithic. The remount then fails as the server address is invalid. Fix this by not overwriting nfs server info in nfs23_parse_monolithic if we're doing a remount. Fixes: f2aedb713c28 ("NFS: Add fs_context support.") Signed-off-by: Martin Kaiser Signed-off-by: Trond Myklebust --- fs/nfs/fs_context.c | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/fs/nfs/fs_context.c b/fs/nfs/fs_context.c index ec93306b7e79..6c9f3f6645dd 100644 --- a/fs/nfs/fs_context.c +++ b/fs/nfs/fs_context.c @@ -1114,9 +1114,12 @@ static int nfs23_parse_monolithic(struct fs_context *fc, ctx->acdirmax = data->acdirmax; ctx->need_mount = false; - memcpy(sap, &data->addr, sizeof(data->addr)); - ctx->nfs_server.addrlen = sizeof(data->addr); - ctx->nfs_server.port = ntohs(data->addr.sin_port); + if (!is_remount_fc(fc)) { + memcpy(sap, &data->addr, sizeof(data->addr)); + ctx->nfs_server.addrlen = sizeof(data->addr); + ctx->nfs_server.port = ntohs(data->addr.sin_port); + } + if (sap->ss_family != AF_INET || !nfs_verify_server_address(sap)) goto out_no_address; From 9b62ef6d239ecccccb0c0262ab39cfcb6d8e40ae Mon Sep 17 00:00:00 2001 From: Olga Kornievskaia Date: Thu, 18 Apr 2024 10:11:04 -0400 Subject: [PATCH 07/15] SUNRPC: fix handling expired GSS context MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit In the case where we have received a successful reply to an RPC request, but while processing the reply the client in rpc_decode_header() finds an expired context, the code ends up propagating the error to the caller instead of getting a new context and retrying the request. To give more details, in rpc_decode_header() we call rpcauth_checkverf() will call into the gss and internally will at some point call gss_validate() which has a check if the current’s context lifetime expired, and it would fail. The reason for the failure gets ‘scrubbed’ and translated to EACCES so when we get back to rpc_decode_header() we just go to “out_verifier” which for that error would get converted to “out_garbage” (ie it’s treated as garballed reply) and the next action is call_encode. Which (1) doesn’t reencode or re-send (not to mention no upcall happens because context expires as that reason just not known) and it again fails in the same decoding process. After re-trying it 3 times the error is propagated back to the caller (ie nfs4_write_done_cb() in the case a failing write). To fix this, instead we need to look to the case where the server decides that context has expired and replies with an RPC auth error. In that case, the rpc_decode_header() goes to "out_msg_denied" in that we return EKEYREJECTED which in call_decode() is sent to “call_reserve” which triggers an upcalls and a re-try of the operation. The proposed fix is in case of a failed rpc_decode_header() to check if credentials were set to be invalid and use that as a proxy for deciding that context has expired and then treat is same way as receiving an auth error. Signed-off-by: Olga Kornievskaia Reviewed-by: Benjamin Coddington Signed-off-by: Trond Myklebust --- net/sunrpc/clnt.c | 13 ++++++++++++- 1 file changed, 12 insertions(+), 1 deletion(-) diff --git a/net/sunrpc/clnt.c b/net/sunrpc/clnt.c index 28f3749f6dc6..f19ad55017c9 100644 --- a/net/sunrpc/clnt.c +++ b/net/sunrpc/clnt.c @@ -2698,8 +2698,19 @@ rpc_decode_header(struct rpc_task *task, struct xdr_stream *xdr) goto out_msg_denied; error = rpcauth_checkverf(task, xdr); - if (error) + if (error) { + struct rpc_cred *cred = task->tk_rqstp->rq_cred; + + if (!test_bit(RPCAUTH_CRED_UPTODATE, &cred->cr_flags)) { + rpcauth_invalcred(task); + if (!task->tk_cred_retry) + goto out_err; + task->tk_cred_retry--; + trace_rpc__stale_creds(task); + return -EKEYREJECTED; + } goto out_verifier; + } p = xdr_inline_decode(xdr, sizeof(*p)); if (!p) From 0dc9f430027b8bd9073fdafdfcdeb1a073ab5594 Mon Sep 17 00:00:00 2001 From: Dan Aloni Date: Thu, 25 Apr 2024 13:49:38 +0300 Subject: [PATCH 08/15] sunrpc: fix NFSACL RPC retry on soft mount It used to be quite awhile ago since 1b63a75180c6 ('SUNRPC: Refactor rpc_clone_client()'), in 2012, that `cl_timeout` was copied in so that all mount parameters propagate to NFSACL clients. However since that change, if mount options as follows are given: soft,timeo=50,retrans=16,vers=3 The resultant NFSACL client receives: cl_softrtry: 1 cl_timeout: to_initval=60000, to_maxval=60000, to_increment=0, to_retries=2, to_exponential=0 These values lead to NFSACL operations not being retried under the condition of transient network outages with soft mount. Instead, getacl call fails after 60 seconds with EIO. The simple fix is to pass the existing client's `cl_timeout` as the new client timeout. Cc: Chuck Lever Cc: Benjamin Coddington Link: https://lore.kernel.org/all/20231105154857.ryakhmgaptq3hb6b@gmail.com/T/ Fixes: 1b63a75180c6 ('SUNRPC: Refactor rpc_clone_client()') Signed-off-by: Dan Aloni Reviewed-by: Benjamin Coddington Signed-off-by: Trond Myklebust --- net/sunrpc/clnt.c | 1 + 1 file changed, 1 insertion(+) diff --git a/net/sunrpc/clnt.c b/net/sunrpc/clnt.c index f19ad55017c9..cfd1b1bf7e35 100644 --- a/net/sunrpc/clnt.c +++ b/net/sunrpc/clnt.c @@ -1071,6 +1071,7 @@ struct rpc_clnt *rpc_bind_new_program(struct rpc_clnt *old, .authflavor = old->cl_auth->au_flavor, .cred = old->cl_cred, .stats = old->cl_stats, + .timeout = old->cl_timeout, }; struct rpc_clnt *clnt; int err; From f06d1b10cb016d5aaecdb1804fefca025387bd10 Mon Sep 17 00:00:00 2001 From: Anna Schumaker Date: Thu, 25 Apr 2024 16:24:29 -0400 Subject: [PATCH 09/15] NFS: Fix READ_PLUS when server doesn't support OP_READ_PLUS Olga showed me a case where the client was sending multiple READ_PLUS calls to the server in parallel, and the server replied NFS4ERR_OPNOTSUPP to each. The client would fall back to READ for the first reply, but fail to retry the other calls. I fix this by removing the test for NFS_CAP_READ_PLUS in nfs4_read_plus_not_supported(). This allows us to reschedule any READ_PLUS call that has a NFS4ERR_OPNOTSUPP return value, even after the capability has been cleared. Reported-by: Olga Kornievskaia Fixes: c567552612ec ("NFS: Add READ_PLUS data segment support") Cc: stable@vger.kernel.org # v5.10+ Signed-off-by: Anna Schumaker Reviewed-by: Benjamin Coddington Signed-off-by: Trond Myklebust --- fs/nfs/nfs4proc.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c index ea390db94b62..c93c12063b3a 100644 --- a/fs/nfs/nfs4proc.c +++ b/fs/nfs/nfs4proc.c @@ -5456,7 +5456,7 @@ static bool nfs4_read_plus_not_supported(struct rpc_task *task, struct rpc_message *msg = &task->tk_msg; if (msg->rpc_proc == &nfs4_procedures[NFSPROC4_CLNT_READ_PLUS] && - server->caps & NFS_CAP_READ_PLUS && task->tk_status == -ENOTSUPP) { + task->tk_status == -ENOTSUPP) { server->caps &= ~NFS_CAP_READ_PLUS; msg->rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_READ]; rpc_restart_call_prepare(task); From d1404e46ae4688c74e7504195f4fe253bcce1522 Mon Sep 17 00:00:00 2001 From: Anna Schumaker Date: Thu, 25 Apr 2024 16:34:40 -0400 Subject: [PATCH 10/15] NFS: Don't enable NFS v2 by default This came up during one of the Bake-a-thon discussions. NFS v2 support was dropped from nfs-utils/mount.nfs in December 2021. Let's turn it off by default in the kernel too, since this means there isn't a way to mount and test it. Signed-off-by: Anna Schumaker Reviewed-by: Jeffrey Layton Signed-off-by: Trond Myklebust --- fs/nfs/Kconfig | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/fs/nfs/Kconfig b/fs/nfs/Kconfig index f7e32d76e34d..57249f040dfc 100644 --- a/fs/nfs/Kconfig +++ b/fs/nfs/Kconfig @@ -33,12 +33,12 @@ config NFS_FS config NFS_V2 tristate "NFS client support for NFS version 2" depends on NFS_FS - default y + default n help This option enables support for version 2 of the NFS protocol (RFC 1094) in the kernel's NFS client. - If unsure, say Y. + If unsure, say N. config NFS_V3 tristate "NFS client support for NFS version 3" From 4836da219781ec510c4c0303df901aa643507a7a Mon Sep 17 00:00:00 2001 From: Dan Aloni Date: Mon, 6 May 2024 12:37:59 +0300 Subject: [PATCH 11/15] rpcrdma: fix handling for RDMA_CM_EVENT_DEVICE_REMOVAL Under the scenario of IB device bonding, when bringing down one of the ports, or all ports, we saw xprtrdma entering a non-recoverable state where it is not even possible to complete the disconnect and shut it down the mount, requiring a reboot. Following debug, we saw that transport connect never ended after receiving the RDMA_CM_EVENT_DEVICE_REMOVAL callback. The DEVICE_REMOVAL callback is irrespective of whether the CM_ID is connected, and ESTABLISHED may not have happened. So need to work with each of these states accordingly. Fixes: 2acc5cae2923 ('xprtrdma: Prevent dereferencing r_xprt->rx_ep after it is freed') Cc: Sagi Grimberg Signed-off-by: Dan Aloni Reviewed-by: Sagi Grimberg Reviewed-by: Chuck Lever Signed-off-by: Trond Myklebust --- net/sunrpc/xprtrdma/verbs.c | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/net/sunrpc/xprtrdma/verbs.c b/net/sunrpc/xprtrdma/verbs.c index 4f8d7efa469f..432557a553e7 100644 --- a/net/sunrpc/xprtrdma/verbs.c +++ b/net/sunrpc/xprtrdma/verbs.c @@ -244,7 +244,11 @@ rpcrdma_cm_event_handler(struct rdma_cm_id *id, struct rdma_cm_event *event) case RDMA_CM_EVENT_DEVICE_REMOVAL: pr_info("rpcrdma: removing device %s for %pISpc\n", ep->re_id->device->name, sap); - fallthrough; + switch (xchg(&ep->re_connect_status, -ENODEV)) { + case 0: goto wake_connect_worker; + case 1: goto disconnected; + } + return 0; case RDMA_CM_EVENT_ADDR_CHANGE: ep->re_connect_status = -ENODEV; goto disconnected; From 3ebcb24646f8c5bfad2866892d3f3cff05514452 Mon Sep 17 00:00:00 2001 From: Olga Kornievskaia Date: Tue, 7 May 2024 11:15:45 -0400 Subject: [PATCH 12/15] pNFS/filelayout: fixup pNfs allocation modes Change left over allocation flags. Fixes: a245832aaa99 ("pNFS/files: Ensure pNFS allocation modes are consistent with nfsiod") Signed-off-by: Olga Kornievskaia Reviewed-by: Benjamin Coddington Signed-off-by: Trond Myklebust --- fs/nfs/filelayout/filelayout.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/fs/nfs/filelayout/filelayout.c b/fs/nfs/filelayout/filelayout.c index cc2ed4b5a4fd..85d2dc9bc212 100644 --- a/fs/nfs/filelayout/filelayout.c +++ b/fs/nfs/filelayout/filelayout.c @@ -875,7 +875,7 @@ filelayout_pg_init_read(struct nfs_pageio_descriptor *pgio, req->wb_bytes, IOMODE_READ, false, - GFP_KERNEL); + nfs_io_gfp_mask()); if (IS_ERR(pgio->pg_lseg)) { pgio->pg_error = PTR_ERR(pgio->pg_lseg); pgio->pg_lseg = NULL; @@ -899,7 +899,7 @@ filelayout_pg_init_write(struct nfs_pageio_descriptor *pgio, req->wb_bytes, IOMODE_RW, false, - GFP_NOFS); + nfs_io_gfp_mask()); if (IS_ERR(pgio->pg_lseg)) { pgio->pg_error = PTR_ERR(pgio->pg_lseg); pgio->pg_lseg = NULL; From 523412b904c4ad2e9649d536bde0e6c75c88638b Mon Sep 17 00:00:00 2001 From: Olga Kornievskaia Date: Tue, 7 May 2024 15:59:33 -0400 Subject: [PATCH 13/15] pNFS/filelayout: check layout segment range Before doing the IO, check that we have the layout covering the range of IO. Signed-off-by: Olga Kornievskaia Reviewed-by: Benjamin Coddington Signed-off-by: Trond Myklebust --- fs/nfs/filelayout/filelayout.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/fs/nfs/filelayout/filelayout.c b/fs/nfs/filelayout/filelayout.c index 85d2dc9bc212..bf3ba2e98f33 100644 --- a/fs/nfs/filelayout/filelayout.c +++ b/fs/nfs/filelayout/filelayout.c @@ -868,6 +868,7 @@ filelayout_pg_init_read(struct nfs_pageio_descriptor *pgio, struct nfs_page *req) { pnfs_generic_pg_check_layout(pgio); + pnfs_generic_pg_check_range(pgio, req); if (!pgio->pg_lseg) { pgio->pg_lseg = fl_pnfs_update_layout(pgio->pg_inode, nfs_req_openctx(req), @@ -892,6 +893,7 @@ filelayout_pg_init_write(struct nfs_pageio_descriptor *pgio, struct nfs_page *req) { pnfs_generic_pg_check_layout(pgio); + pnfs_generic_pg_check_range(pgio, req); if (!pgio->pg_lseg) { pgio->pg_lseg = fl_pnfs_update_layout(pgio->pg_inode, nfs_req_openctx(req), From a01b077a8743b2ed329c587cf4bbe49682da243f Mon Sep 17 00:00:00 2001 From: Olga Kornievskaia Date: Fri, 10 May 2024 15:07:43 -0400 Subject: [PATCH 14/15] pNFS: rework pnfs_generic_pg_check_layout to check IO range All callers of pnfs_generic_pg_check_layout() also want to do a call to check that the layout's range covers the IO range. Merge the functionality of the pnfs_generic_pg_check_range() into that of pnfs_generic_pg_check_layout(). Signed-off-by: Olga Kornievskaia Reviewed-by: Benjamin Coddington Signed-off-by: Trond Myklebust --- fs/nfs/filelayout/filelayout.c | 6 ++---- fs/nfs/flexfilelayout/flexfilelayout.c | 12 ++--------- fs/nfs/pnfs.c | 29 ++++++++------------------ fs/nfs/pnfs.h | 3 +-- 4 files changed, 14 insertions(+), 36 deletions(-) diff --git a/fs/nfs/filelayout/filelayout.c b/fs/nfs/filelayout/filelayout.c index bf3ba2e98f33..29d84dc66ca3 100644 --- a/fs/nfs/filelayout/filelayout.c +++ b/fs/nfs/filelayout/filelayout.c @@ -867,8 +867,7 @@ static void filelayout_pg_init_read(struct nfs_pageio_descriptor *pgio, struct nfs_page *req) { - pnfs_generic_pg_check_layout(pgio); - pnfs_generic_pg_check_range(pgio, req); + pnfs_generic_pg_check_layout(pgio, req); if (!pgio->pg_lseg) { pgio->pg_lseg = fl_pnfs_update_layout(pgio->pg_inode, nfs_req_openctx(req), @@ -892,8 +891,7 @@ static void filelayout_pg_init_write(struct nfs_pageio_descriptor *pgio, struct nfs_page *req) { - pnfs_generic_pg_check_layout(pgio); - pnfs_generic_pg_check_range(pgio, req); + pnfs_generic_pg_check_layout(pgio, req); if (!pgio->pg_lseg) { pgio->pg_lseg = fl_pnfs_update_layout(pgio->pg_inode, nfs_req_openctx(req), diff --git a/fs/nfs/flexfilelayout/flexfilelayout.c b/fs/nfs/flexfilelayout/flexfilelayout.c index 3e724cb7ef01..24188af56d5b 100644 --- a/fs/nfs/flexfilelayout/flexfilelayout.c +++ b/fs/nfs/flexfilelayout/flexfilelayout.c @@ -822,14 +822,6 @@ ff_layout_pg_get_read(struct nfs_pageio_descriptor *pgio, } } -static void -ff_layout_pg_check_layout(struct nfs_pageio_descriptor *pgio, - struct nfs_page *req) -{ - pnfs_generic_pg_check_layout(pgio); - pnfs_generic_pg_check_range(pgio, req); -} - static void ff_layout_pg_init_read(struct nfs_pageio_descriptor *pgio, struct nfs_page *req) @@ -840,7 +832,7 @@ ff_layout_pg_init_read(struct nfs_pageio_descriptor *pgio, u32 ds_idx; retry: - ff_layout_pg_check_layout(pgio, req); + pnfs_generic_pg_check_layout(pgio, req); /* Use full layout for now */ if (!pgio->pg_lseg) { ff_layout_pg_get_read(pgio, req, false); @@ -895,7 +887,7 @@ ff_layout_pg_init_write(struct nfs_pageio_descriptor *pgio, u32 i; retry: - ff_layout_pg_check_layout(pgio, req); + pnfs_generic_pg_check_layout(pgio, req); if (!pgio->pg_lseg) { pgio->pg_lseg = pnfs_update_layout(pgio->pg_inode, nfs_req_openctx(req), diff --git a/fs/nfs/pnfs.c b/fs/nfs/pnfs.c index a5cc6199127f..b5834728f31b 100644 --- a/fs/nfs/pnfs.c +++ b/fs/nfs/pnfs.c @@ -2705,38 +2705,28 @@ pnfs_layout_return_unused_byclid(struct nfs_client *clp, &range); } +/* Check if we have we have a valid layout but if there isn't an intersection + * between the request and the pgio->pg_lseg, put this pgio->pg_lseg away. + */ void -pnfs_generic_pg_check_layout(struct nfs_pageio_descriptor *pgio) +pnfs_generic_pg_check_layout(struct nfs_pageio_descriptor *pgio, + struct nfs_page *req) { if (pgio->pg_lseg == NULL || - test_bit(NFS_LSEG_VALID, &pgio->pg_lseg->pls_flags)) + (test_bit(NFS_LSEG_VALID, &pgio->pg_lseg->pls_flags) && + pnfs_lseg_request_intersecting(pgio->pg_lseg, req))) return; pnfs_put_lseg(pgio->pg_lseg); pgio->pg_lseg = NULL; } EXPORT_SYMBOL_GPL(pnfs_generic_pg_check_layout); -/* - * Check for any intersection between the request and the pgio->pg_lseg, - * and if none, put this pgio->pg_lseg away. - */ -void -pnfs_generic_pg_check_range(struct nfs_pageio_descriptor *pgio, struct nfs_page *req) -{ - if (pgio->pg_lseg && !pnfs_lseg_request_intersecting(pgio->pg_lseg, req)) { - pnfs_put_lseg(pgio->pg_lseg); - pgio->pg_lseg = NULL; - } -} -EXPORT_SYMBOL_GPL(pnfs_generic_pg_check_range); - void pnfs_generic_pg_init_read(struct nfs_pageio_descriptor *pgio, struct nfs_page *req) { u64 rd_size; - pnfs_generic_pg_check_layout(pgio); - pnfs_generic_pg_check_range(pgio, req); + pnfs_generic_pg_check_layout(pgio, req); if (pgio->pg_lseg == NULL) { if (pgio->pg_dreq == NULL) rd_size = i_size_read(pgio->pg_inode) - req_offset(req); @@ -2766,8 +2756,7 @@ void pnfs_generic_pg_init_write(struct nfs_pageio_descriptor *pgio, struct nfs_page *req, u64 wb_size) { - pnfs_generic_pg_check_layout(pgio); - pnfs_generic_pg_check_range(pgio, req); + pnfs_generic_pg_check_layout(pgio, req); if (pgio->pg_lseg == NULL) { pgio->pg_lseg = pnfs_update_layout(pgio->pg_inode, nfs_req_openctx(req), diff --git a/fs/nfs/pnfs.h b/fs/nfs/pnfs.h index db57a85500ee..fa5beeaaf5da 100644 --- a/fs/nfs/pnfs.h +++ b/fs/nfs/pnfs.h @@ -257,8 +257,7 @@ void pnfs_put_lseg(struct pnfs_layout_segment *lseg); void set_pnfs_layoutdriver(struct nfs_server *, const struct nfs_fh *, struct nfs_fsinfo *); void unset_pnfs_layoutdriver(struct nfs_server *); -void pnfs_generic_pg_check_layout(struct nfs_pageio_descriptor *pgio); -void pnfs_generic_pg_check_range(struct nfs_pageio_descriptor *pgio, struct nfs_page *req); +void pnfs_generic_pg_check_layout(struct nfs_pageio_descriptor *pgio, struct nfs_page *req); void pnfs_generic_pg_init_read(struct nfs_pageio_descriptor *, struct nfs_page *); int pnfs_generic_pg_readpages(struct nfs_pageio_descriptor *desc); void pnfs_generic_pg_init_write(struct nfs_pageio_descriptor *pgio, From 3c0a2e0b0ae661457c8505fecc7be5501aa7a715 Mon Sep 17 00:00:00 2001 From: Sergey Shtylyov Date: Fri, 10 May 2024 23:24:04 +0300 Subject: [PATCH 15/15] nfs: fix undefined behavior in nfs_block_bits() Shifting *signed int* typed constant 1 left by 31 bits causes undefined behavior. Specify the correct *unsigned long* type by using 1UL instead. Found by Linux Verification Center (linuxtesting.org) with the Svace static analysis tool. Cc: stable@vger.kernel.org Signed-off-by: Sergey Shtylyov Reviewed-by: Benjamin Coddington Signed-off-by: Trond Myklebust --- fs/nfs/internal.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/fs/nfs/internal.h b/fs/nfs/internal.h index dc0693b3d214..9f0f4534744b 100644 --- a/fs/nfs/internal.h +++ b/fs/nfs/internal.h @@ -717,9 +717,9 @@ unsigned long nfs_block_bits(unsigned long bsize, unsigned char *nrbitsp) if ((bsize & (bsize - 1)) || nrbitsp) { unsigned char nrbits; - for (nrbits = 31; nrbits && !(bsize & (1 << nrbits)); nrbits--) + for (nrbits = 31; nrbits && !(bsize & (1UL << nrbits)); nrbits--) ; - bsize = 1 << nrbits; + bsize = 1UL << nrbits; if (nrbitsp) *nrbitsp = nrbits; }