From 9947e57b22ddfb6f697fa45ef5c92d2aa17b2edf Mon Sep 17 00:00:00 2001 From: Bo Liu Date: Wed, 15 Jun 2022 04:20:02 -0400 Subject: [PATCH 01/27] SUNRPC: Directly use ida_alloc()/free() Use ida_alloc()/ida_free() instead of ida_simple_get()/ida_simple_remove(). The latter is deprecated and more verbose. Signed-off-by: Bo Liu Signed-off-by: Anna Schumaker --- net/sunrpc/clnt.c | 4 ++-- net/sunrpc/xprt.c | 4 ++-- net/sunrpc/xprtmultipath.c | 4 ++-- 3 files changed, 6 insertions(+), 6 deletions(-) diff --git a/net/sunrpc/clnt.c b/net/sunrpc/clnt.c index c284efa3d1ef..4d8665f15dd7 100644 --- a/net/sunrpc/clnt.c +++ b/net/sunrpc/clnt.c @@ -345,7 +345,7 @@ static int rpc_alloc_clid(struct rpc_clnt *clnt) { int clid; - clid = ida_simple_get(&rpc_clids, 0, 0, GFP_KERNEL); + clid = ida_alloc(&rpc_clids, GFP_KERNEL); if (clid < 0) return clid; clnt->cl_clid = clid; @@ -354,7 +354,7 @@ static int rpc_alloc_clid(struct rpc_clnt *clnt) static void rpc_free_clid(struct rpc_clnt *clnt) { - ida_simple_remove(&rpc_clids, clnt->cl_clid); + ida_free(&rpc_clids, clnt->cl_clid); } static struct rpc_clnt * rpc_new_client(const struct rpc_create_args *args, diff --git a/net/sunrpc/xprt.c b/net/sunrpc/xprt.c index f8fae7815649..a50febadb37e 100644 --- a/net/sunrpc/xprt.c +++ b/net/sunrpc/xprt.c @@ -1788,7 +1788,7 @@ static int xprt_alloc_id(struct rpc_xprt *xprt) { int id; - id = ida_simple_get(&rpc_xprt_ids, 0, 0, GFP_KERNEL); + id = ida_alloc(&rpc_xprt_ids, GFP_KERNEL); if (id < 0) return id; @@ -1798,7 +1798,7 @@ static int xprt_alloc_id(struct rpc_xprt *xprt) static void xprt_free_id(struct rpc_xprt *xprt) { - ida_simple_remove(&rpc_xprt_ids, xprt->id); + ida_free(&rpc_xprt_ids, xprt->id); } struct rpc_xprt *xprt_alloc(struct net *net, size_t size, diff --git a/net/sunrpc/xprtmultipath.c b/net/sunrpc/xprtmultipath.c index 685db598acbe..701250b305db 100644 --- a/net/sunrpc/xprtmultipath.c +++ b/net/sunrpc/xprtmultipath.c @@ -103,7 +103,7 @@ static int xprt_switch_alloc_id(struct rpc_xprt_switch *xps, gfp_t gfp_flags) { int id; - id = ida_simple_get(&rpc_xprtswitch_ids, 0, 0, gfp_flags); + id = ida_alloc(&rpc_xprtswitch_ids, gfp_flags); if (id < 0) return id; @@ -113,7 +113,7 @@ static int xprt_switch_alloc_id(struct rpc_xprt_switch *xps, gfp_t gfp_flags) static void xprt_switch_free_id(struct rpc_xprt_switch *xps) { - ida_simple_remove(&rpc_xprtswitch_ids, xps->xps_id); + ida_free(&rpc_xprtswitch_ids, xps->xps_id); } /** From 724e2df95b08a7e6ca989a9f96b29dc92ece9cd9 Mon Sep 17 00:00:00 2001 From: Bo Liu Date: Wed, 15 Jun 2022 02:27:45 -0400 Subject: [PATCH 02/27] NFSv4: Directly use ida_alloc()/free() Use ida_alloc()/ida_free() instead of ida_simple_get()/ida_simple_remove(). The latter is deprecated and more verbose. Signed-off-by: Bo Liu Signed-off-by: Anna Schumaker --- fs/nfs/nfs4state.c | 10 ++++------ 1 file changed, 4 insertions(+), 6 deletions(-) diff --git a/fs/nfs/nfs4state.c b/fs/nfs/nfs4state.c index 9bab3e9c702a..beb9448df515 100644 --- a/fs/nfs/nfs4state.c +++ b/fs/nfs/nfs4state.c @@ -497,8 +497,7 @@ nfs4_alloc_state_owner(struct nfs_server *server, sp = kzalloc(sizeof(*sp), gfp_flags); if (!sp) return NULL; - sp->so_seqid.owner_id = ida_simple_get(&server->openowner_id, 0, 0, - gfp_flags); + sp->so_seqid.owner_id = ida_alloc(&server->openowner_id, gfp_flags); if (sp->so_seqid.owner_id < 0) { kfree(sp); return NULL; @@ -534,7 +533,7 @@ static void nfs4_free_state_owner(struct nfs4_state_owner *sp) { nfs4_destroy_seqid_counter(&sp->so_seqid); put_cred(sp->so_cred); - ida_simple_remove(&sp->so_server->openowner_id, sp->so_seqid.owner_id); + ida_free(&sp->so_server->openowner_id, sp->so_seqid.owner_id); kfree(sp); } @@ -877,8 +876,7 @@ static struct nfs4_lock_state *nfs4_alloc_lock_state(struct nfs4_state *state, f refcount_set(&lsp->ls_count, 1); lsp->ls_state = state; lsp->ls_owner = fl_owner; - lsp->ls_seqid.owner_id = ida_simple_get(&server->lockowner_id, - 0, 0, GFP_KERNEL_ACCOUNT); + lsp->ls_seqid.owner_id = ida_alloc(&server->lockowner_id, GFP_KERNEL_ACCOUNT); if (lsp->ls_seqid.owner_id < 0) goto out_free; INIT_LIST_HEAD(&lsp->ls_locks); @@ -890,7 +888,7 @@ out_free: void nfs4_free_lock_state(struct nfs_server *server, struct nfs4_lock_state *lsp) { - ida_simple_remove(&server->lockowner_id, lsp->ls_seqid.owner_id); + ida_free(&server->lockowner_id, lsp->ls_seqid.owner_id); nfs4_destroy_seqid_counter(&lsp->ls_seqid); kfree(lsp); } From d6abc719a213b8c409789799786e11d203adb3b0 Mon Sep 17 00:00:00 2001 From: Ziyang Xuan Date: Tue, 7 Jun 2022 15:32:01 +0800 Subject: [PATCH 03/27] SUNRPC: use max_t() to simplify open code Use max_t() to simplify open code which uses "if...else" to get maximum of two values. Generated by coccinelle script: scripts/coccinelle/misc/minmax.cocci Signed-off-by: Ziyang Xuan Signed-off-by: Anna Schumaker --- net/sunrpc/xprt.c | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/net/sunrpc/xprt.c b/net/sunrpc/xprt.c index a50febadb37e..71dc26373444 100644 --- a/net/sunrpc/xprt.c +++ b/net/sunrpc/xprt.c @@ -1822,10 +1822,7 @@ struct rpc_xprt *xprt_alloc(struct net *net, size_t size, goto out_free; list_add(&req->rq_list, &xprt->free); } - if (max_alloc > num_prealloc) - xprt->max_reqs = max_alloc; - else - xprt->max_reqs = num_prealloc; + xprt->max_reqs = max_t(unsigned int, max_alloc, num_prealloc); xprt->min_reqs = num_prealloc; xprt->num_reqs = num_prealloc; From 7e7ce2ccbae746a88e21b4ce94dbf372b31c152c Mon Sep 17 00:00:00 2001 From: yuzhe Date: Wed, 15 Jun 2022 13:39:24 +0800 Subject: [PATCH 04/27] nfs: remove unnecessary (void*) conversions. remove unnecessary void* type castings. Signed-off-by: yuzhe Signed-off-by: Anna Schumaker --- fs/nfs/inode.c | 6 +++--- fs/nfs/nfs42xattr.c | 2 +- fs/nfs/nfs4idmap.c | 2 +- fs/nfs/nfs4proc.c | 4 ++-- 4 files changed, 7 insertions(+), 7 deletions(-) diff --git a/fs/nfs/inode.c b/fs/nfs/inode.c index bea7c005119c..a87e529065f9 100644 --- a/fs/nfs/inode.c +++ b/fs/nfs/inode.c @@ -318,7 +318,7 @@ struct nfs_find_desc { static int nfs_find_actor(struct inode *inode, void *opaque) { - struct nfs_find_desc *desc = (struct nfs_find_desc *)opaque; + struct nfs_find_desc *desc = opaque; struct nfs_fh *fh = desc->fh; struct nfs_fattr *fattr = desc->fattr; @@ -336,7 +336,7 @@ nfs_find_actor(struct inode *inode, void *opaque) static int nfs_init_locked(struct inode *inode, void *opaque) { - struct nfs_find_desc *desc = (struct nfs_find_desc *)opaque; + struct nfs_find_desc *desc = opaque; struct nfs_fattr *fattr = desc->fattr; set_nfs_fileid(inode, fattr->fileid); @@ -2271,7 +2271,7 @@ static inline void nfs4_init_once(struct nfs_inode *nfsi) static void init_once(void *foo) { - struct nfs_inode *nfsi = (struct nfs_inode *) foo; + struct nfs_inode *nfsi = foo; inode_init_once(&nfsi->vfs_inode); INIT_LIST_HEAD(&nfsi->open_files); diff --git a/fs/nfs/nfs42xattr.c b/fs/nfs/nfs42xattr.c index a9bf09fdf2c3..76ae11834206 100644 --- a/fs/nfs/nfs42xattr.c +++ b/fs/nfs/nfs42xattr.c @@ -981,7 +981,7 @@ nfs4_xattr_entry_count(struct shrinker *shrink, struct shrink_control *sc) static void nfs4_xattr_cache_init_once(void *p) { - struct nfs4_xattr_cache *cache = (struct nfs4_xattr_cache *)p; + struct nfs4_xattr_cache *cache = p; spin_lock_init(&cache->listxattr_lock); atomic_long_set(&cache->nent, 0); diff --git a/fs/nfs/nfs4idmap.c b/fs/nfs/nfs4idmap.c index ec6afd3c4bca..e3fdd2f45b01 100644 --- a/fs/nfs/nfs4idmap.c +++ b/fs/nfs/nfs4idmap.c @@ -583,7 +583,7 @@ static int nfs_idmap_legacy_upcall(struct key *authkey, void *aux) struct request_key_auth *rka = get_request_key_auth(authkey); struct rpc_pipe_msg *msg; struct idmap_msg *im; - struct idmap *idmap = (struct idmap *)aux; + struct idmap *idmap = aux; struct key *key = rka->target_key; int ret = -ENOKEY; diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c index 3ed14a2a84a4..17362ba94592 100644 --- a/fs/nfs/nfs4proc.c +++ b/fs/nfs/nfs4proc.c @@ -6607,7 +6607,7 @@ static void nfs4_delegreturn_prepare(struct rpc_task *task, void *data) struct nfs4_delegreturndata *d_data; struct pnfs_layout_hdr *lo; - d_data = (struct nfs4_delegreturndata *)data; + d_data = data; if (!d_data->lr.roc && nfs4_wait_on_layoutreturn(d_data->inode, task)) { nfs4_sequence_done(task, &d_data->res.seq_res); @@ -8900,7 +8900,7 @@ int nfs4_proc_exchange_id(struct nfs_client *clp, const struct cred *cred) void nfs4_test_session_trunk(struct rpc_clnt *clnt, struct rpc_xprt *xprt, void *data) { - struct nfs4_add_xprt_data *adata = (struct nfs4_add_xprt_data *)data; + struct nfs4_add_xprt_data *adata = data; struct rpc_task *task; int status; From 384edeb46f07f4ee1b3adda9416e724421e2fad5 Mon Sep 17 00:00:00 2001 From: Lukas Bulwahn Date: Wed, 10 Aug 2022 13:40:01 +0200 Subject: [PATCH 05/27] NFS: clean up a needless assignment in nfs_file_write() Commit 064109db53ec ("NFS: remove redundant code in nfs_file_write()") identifies that filemap_fdatawait_range() will always return 0 and removes a dead error-handling case in nfs_file_write(). With this change however, assigning the return of filemap_fdatawait_range() to the result variable is a dead store. Remove this needless assignment. No functional change. No change in object code. Signed-off-by: Lukas Bulwahn Signed-off-by: Anna Schumaker --- fs/nfs/file.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/fs/nfs/file.c b/fs/nfs/file.c index e032fe201a36..7a4d2fe9c0b3 100644 --- a/fs/nfs/file.c +++ b/fs/nfs/file.c @@ -655,9 +655,9 @@ ssize_t nfs_file_write(struct kiocb *iocb, struct iov_iter *from) goto out; } if (mntflags & NFS_MOUNT_WRITE_WAIT) { - result = filemap_fdatawait_range(file->f_mapping, - iocb->ki_pos - written, - iocb->ki_pos - 1); + filemap_fdatawait_range(file->f_mapping, + iocb->ki_pos - written, + iocb->ki_pos - 1); } result = generic_write_sync(iocb, written); if (result < 0) From 15bcdc92d108b3bd85a44d7712496c89cf3ffddd Mon Sep 17 00:00:00 2001 From: Wolfram Sang Date: Thu, 18 Aug 2022 23:02:29 +0200 Subject: [PATCH 06/27] SUNRPC: move from strlcpy with unused retval to strscpy Follow the advice of the below link and prefer 'strscpy' in this subsystem. Conversion is 1:1 because the return value is not used. Generated by a coccinelle script. Link: https://lore.kernel.org/r/CAHk-=wgfRnXz0W3D37d01q3JFkr_i_uTL=V6A6G1oUZcprmknw@mail.gmail.com/ Signed-off-by: Wolfram Sang Signed-off-by: Anna Schumaker --- net/sunrpc/xprtsock.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/sunrpc/xprtsock.c b/net/sunrpc/xprtsock.c index e976007f4fd0..b3341c202ea0 100644 --- a/net/sunrpc/xprtsock.c +++ b/net/sunrpc/xprtsock.c @@ -261,7 +261,7 @@ static void xs_format_common_peer_addresses(struct rpc_xprt *xprt) switch (sap->sa_family) { case AF_LOCAL: sun = xs_addr_un(xprt); - strlcpy(buf, sun->sun_path, sizeof(buf)); + strscpy(buf, sun->sun_path, sizeof(buf)); xprt->address_strings[RPC_DISPLAY_ADDR] = kstrdup(buf, GFP_KERNEL); break; From 0dd7439f382518e9997cfa7ca9d06799dbeb33fa Mon Sep 17 00:00:00 2001 From: Wolfram Sang Date: Thu, 18 Aug 2022 23:01:15 +0200 Subject: [PATCH 07/27] NFS: move from strlcpy with unused retval to strscpy Follow the advice of the below link and prefer 'strscpy' in this subsystem. Conversion is 1:1 because the return value is not used. Generated by a coccinelle script. Link: https://lore.kernel.org/r/CAHk-=wgfRnXz0W3D37d01q3JFkr_i_uTL=V6A6G1oUZcprmknw@mail.gmail.com/ Signed-off-by: Wolfram Sang Signed-off-by: Anna Schumaker --- fs/nfs/nfs4client.c | 2 +- fs/nfs/nfsroot.c | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/fs/nfs/nfs4client.c b/fs/nfs/nfs4client.c index 3c5678aec006..7a5162afa5c0 100644 --- a/fs/nfs/nfs4client.c +++ b/fs/nfs/nfs4client.c @@ -254,7 +254,7 @@ struct nfs_client *nfs4_alloc_client(const struct nfs_client_initdata *cl_init) goto error; ip_addr = (const char *)buf; } - strlcpy(clp->cl_ipaddr, ip_addr, sizeof(clp->cl_ipaddr)); + strscpy(clp->cl_ipaddr, ip_addr, sizeof(clp->cl_ipaddr)); err = nfs_idmap_new(clp); if (err < 0) { diff --git a/fs/nfs/nfsroot.c b/fs/nfs/nfsroot.c index fa148308822c..620329b7e6ae 100644 --- a/fs/nfs/nfsroot.c +++ b/fs/nfs/nfsroot.c @@ -139,7 +139,7 @@ static int __init nfs_root_setup(char *line) ROOT_DEV = Root_NFS; if (line[0] == '/' || line[0] == ',' || (line[0] >= '0' && line[0] <= '9')) { - strlcpy(nfs_root_parms, line, sizeof(nfs_root_parms)); + strscpy(nfs_root_parms, line, sizeof(nfs_root_parms)); } else { size_t n = strlen(line) + sizeof(NFS_ROOT) - 1; if (n >= sizeof(nfs_root_parms)) From 90377158bd2d2acd20e6131e84c234d715b7aa42 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Wed, 24 Aug 2022 16:56:48 -0400 Subject: [PATCH 08/27] NFSv4/pNFS: Always return layout stats on layout return for flexfiles We want to ensure that the server never misses the layout stats when we're closing the file, so that it knows whether or not to update its internal state. Otherwise, if we were racing with a layout stat, we might cause the server to invalidate its layout before the layout stat got processed. Fixes: 06946c6a3d8b ("pNFS/flexfiles: Only send layoutstats updates for mirrors that were updated") Signed-off-by: Trond Myklebust Signed-off-by: Anna Schumaker --- fs/nfs/flexfilelayout/flexfilelayout.c | 25 ++++++++++++++++++------- 1 file changed, 18 insertions(+), 7 deletions(-) diff --git a/fs/nfs/flexfilelayout/flexfilelayout.c b/fs/nfs/flexfilelayout/flexfilelayout.c index 7d285561e59f..1443330ae998 100644 --- a/fs/nfs/flexfilelayout/flexfilelayout.c +++ b/fs/nfs/flexfilelayout/flexfilelayout.c @@ -30,14 +30,20 @@ #define FF_LAYOUT_POLL_RETRY_MAX (15*HZ) #define FF_LAYOUTRETURN_MAXERR 20 +enum nfs4_ff_op_type { + NFS4_FF_OP_LAYOUTSTATS, + NFS4_FF_OP_LAYOUTRETURN, +}; + static unsigned short io_maxretrans; static const struct pnfs_commit_ops ff_layout_commit_ops; static void ff_layout_read_record_layoutstats_done(struct rpc_task *task, struct nfs_pgio_header *hdr); -static int ff_layout_mirror_prepare_stats(struct pnfs_layout_hdr *lo, +static int +ff_layout_mirror_prepare_stats(struct pnfs_layout_hdr *lo, struct nfs42_layoutstat_devinfo *devinfo, - int dev_limit); + int dev_limit, enum nfs4_ff_op_type type); static void ff_layout_encode_ff_layoutupdate(struct xdr_stream *xdr, const struct nfs42_layoutstat_devinfo *devinfo, struct nfs4_ff_layout_mirror *mirror); @@ -2161,8 +2167,9 @@ ff_layout_prepare_layoutreturn(struct nfs4_layoutreturn_args *args) FF_LAYOUTRETURN_MAXERR); spin_lock(&args->inode->i_lock); - ff_args->num_dev = ff_layout_mirror_prepare_stats(&ff_layout->generic_hdr, - &ff_args->devinfo[0], ARRAY_SIZE(ff_args->devinfo)); + ff_args->num_dev = ff_layout_mirror_prepare_stats( + &ff_layout->generic_hdr, &ff_args->devinfo[0], + ARRAY_SIZE(ff_args->devinfo), NFS4_FF_OP_LAYOUTRETURN); spin_unlock(&args->inode->i_lock); args->ld_private->ops = &layoutreturn_ops; @@ -2396,7 +2403,7 @@ static const struct nfs4_xdr_opaque_ops layoutstat_ops = { static int ff_layout_mirror_prepare_stats(struct pnfs_layout_hdr *lo, struct nfs42_layoutstat_devinfo *devinfo, - int dev_limit) + int dev_limit, enum nfs4_ff_op_type type) { struct nfs4_flexfile_layout *ff_layout = FF_LAYOUT_FROM_HDR(lo); struct nfs4_ff_layout_mirror *mirror; @@ -2408,7 +2415,9 @@ ff_layout_mirror_prepare_stats(struct pnfs_layout_hdr *lo, break; if (IS_ERR_OR_NULL(mirror->mirror_ds)) continue; - if (!test_and_clear_bit(NFS4_FF_MIRROR_STAT_AVAIL, &mirror->flags)) + if (!test_and_clear_bit(NFS4_FF_MIRROR_STAT_AVAIL, + &mirror->flags) && + type != NFS4_FF_OP_LAYOUTRETURN) continue; /* mirror refcount put in cleanup_layoutstats */ if (!refcount_inc_not_zero(&mirror->ref)) @@ -2448,7 +2457,9 @@ ff_layout_prepare_layoutstats(struct nfs42_layoutstat_args *args) spin_lock(&args->inode->i_lock); ff_layout = FF_LAYOUT_FROM_HDR(NFS_I(args->inode)->layout); args->num_dev = ff_layout_mirror_prepare_stats(&ff_layout->generic_hdr, - &args->devinfo[0], dev_count); + &args->devinfo[0], + dev_count, + NFS4_FF_OP_LAYOUTSTATS); spin_unlock(&args->inode->i_lock); if (!args->num_dev) { kfree(args->devinfo); From 74fd2ca0f6af9cc332957b2e6ef70772e421403a Mon Sep 17 00:00:00 2001 From: Jiangshan Yi Date: Tue, 6 Sep 2022 10:41:19 +0800 Subject: [PATCH 09/27] fs/nfs/pnfs_nfs.c: fix spelling typo and syntax error in comment Fix spelling typo and syntax error in comment. Suggested-by: Randy Dunlap Reported-by: k2ci Signed-off-by: Jiangshan Yi Reviewed-by: Randy Dunlap Signed-off-by: Anna Schumaker --- fs/nfs/pnfs_nfs.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/fs/nfs/pnfs_nfs.c b/fs/nfs/pnfs_nfs.c index 657c242a18ff..987c88ddeaf0 100644 --- a/fs/nfs/pnfs_nfs.c +++ b/fs/nfs/pnfs_nfs.c @@ -374,12 +374,12 @@ pnfs_bucket_search_commit_reqs(struct pnfs_commit_bucket *buckets, return NULL; } -/* pnfs_generic_search_commit_reqs - Search lists in @cinfo for the head reqest +/* pnfs_generic_search_commit_reqs - Search lists in @cinfo for the head request * for @page * @cinfo - commit info for current inode * @page - page to search for matching head request * - * Returns a the head request if one is found, otherwise returns NULL. + * Return: the head request if one is found, otherwise %NULL. */ struct nfs_page * pnfs_generic_search_commit_reqs(struct nfs_commit_info *cinfo, struct page *page) From 8aa7cf85248f7b1fb49a1117c60a160b5b22b337 Mon Sep 17 00:00:00 2001 From: Gaosheng Cui Date: Fri, 9 Sep 2022 14:46:40 +0800 Subject: [PATCH 10/27] NFSv4: remove nfs4_renewd_prepare_shutdown() declaration nfs4_renewd_prepare_shutdown() has been removed since commit 3050141bae57 ("NFSv4: Kill nfs4_renewd_prepare_shutdown()"), so remove it. Signed-off-by: Gaosheng Cui Signed-off-by: Anna Schumaker --- fs/nfs/nfs4_fs.h | 1 - 1 file changed, 1 deletion(-) diff --git a/fs/nfs/nfs4_fs.h b/fs/nfs/nfs4_fs.h index 79df6e83881b..400a71e75238 100644 --- a/fs/nfs/nfs4_fs.h +++ b/fs/nfs/nfs4_fs.h @@ -459,7 +459,6 @@ struct nfs_client *nfs4_alloc_client(const struct nfs_client_initdata *); /* nfs4renewd.c */ extern void nfs4_schedule_state_renewal(struct nfs_client *); -extern void nfs4_renewd_prepare_shutdown(struct nfs_server *); extern void nfs4_kill_renewd(struct nfs_client *); extern void nfs4_renew_state(struct work_struct *); extern void nfs4_set_lease_period(struct nfs_client *clp, unsigned long lease); From a035618caf8718a1d4e840ec39dfc5fce0dcdee1 Mon Sep 17 00:00:00 2001 From: Gaosheng Cui Date: Fri, 9 Sep 2022 14:24:11 +0800 Subject: [PATCH 11/27] nfs: remove nfs_wait_atomic_killable() and nfs_write_prepare() declaration nfs_write_prepare() has been removed since commit a4cdda59111f ("NFS: Create a common pgio_rpc_prepare function"), so remove it. nfs_wait_atomic_killable() has been removed since commit 723c921e7dfc ("sched/wait, fs/nfs: Convert wait_on_atomic_t() usage to the new wait_var_event() API"), so remove it. Signed-off-by: Gaosheng Cui Signed-off-by: Anna Schumaker --- fs/nfs/internal.h | 2 -- 1 file changed, 2 deletions(-) diff --git a/fs/nfs/internal.h b/fs/nfs/internal.h index 898dd95bc7a7..d914d609b85b 100644 --- a/fs/nfs/internal.h +++ b/fs/nfs/internal.h @@ -435,7 +435,6 @@ extern void nfs_zap_acl_cache(struct inode *inode); extern void nfs_set_cache_invalid(struct inode *inode, unsigned long flags); extern bool nfs_check_cache_invalid(struct inode *, unsigned long); extern int nfs_wait_bit_killable(struct wait_bit_key *key, int mode); -extern int nfs_wait_atomic_killable(atomic_t *p, unsigned int mode); /* super.c */ extern const struct super_operations nfs_sops; @@ -503,7 +502,6 @@ extern void nfs_pageio_init_write(struct nfs_pageio_descriptor *pgio, const struct nfs_pgio_completion_ops *compl_ops); extern void nfs_pageio_reset_write_mds(struct nfs_pageio_descriptor *pgio); extern void nfs_commit_free(struct nfs_commit_data *p); -extern void nfs_write_prepare(struct rpc_task *task, void *calldata); extern void nfs_commit_prepare(struct rpc_task *task, void *calldata); extern int nfs_initiate_commit(struct rpc_clnt *clnt, struct nfs_commit_data *data, From 963694615d9d5a860e6571da18e50f14ab3583b3 Mon Sep 17 00:00:00 2001 From: Anna Schumaker Date: Wed, 21 Sep 2022 13:21:52 -0400 Subject: [PATCH 12/27] NFSv4.2: Add special handling for LISTXATTR receiving NFS4ERR_NOXATTR We can translate this into an empty response list instead of passing an error up to userspace. Signed-off-by: Anna Schumaker --- fs/nfs/nfs42xdr.c | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/fs/nfs/nfs42xdr.c b/fs/nfs/nfs42xdr.c index b56f05113d36..fe1aeb0f048f 100644 --- a/fs/nfs/nfs42xdr.c +++ b/fs/nfs/nfs42xdr.c @@ -569,6 +569,14 @@ static int decode_listxattrs(struct xdr_stream *xdr, */ if (status == -ETOOSMALL) status = -ERANGE; + /* + * Special case: for LISTXATTRS, NFS4ERR_NOXATTR + * should be translated to success with zero-length reply. + */ + if (status == -ENODATA) { + res->eof = true; + status = 0; + } goto out; } From 3a100e4d8a2f7660d220c000364fe57679da9c92 Mon Sep 17 00:00:00 2001 From: Anna Schumaker Date: Wed, 21 Sep 2022 16:29:57 -0400 Subject: [PATCH 13/27] NFSv4.2: Move TRACE_DEFINE_ENUM(NFS4_CONTENT_*) under CONFIG_NFS_V4_2 NFS4_CONTENT_DATA and NFS4_CONTENT_HOLE both only exist under NFS v4.2. Move their corresponding TRACE_DEFINE_ENUM calls under this Kconfig option. Signed-off-by: Anna Schumaker --- fs/nfs/nfs4trace.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/nfs/nfs4trace.h b/fs/nfs/nfs4trace.h index 6ee6ad3674a2..37c4c105ed29 100644 --- a/fs/nfs/nfs4trace.h +++ b/fs/nfs/nfs4trace.h @@ -2097,6 +2097,7 @@ TRACE_EVENT(ff_layout_commit_error, ) ); +#ifdef CONFIG_NFS_V4_2 TRACE_DEFINE_ENUM(NFS4_CONTENT_DATA); TRACE_DEFINE_ENUM(NFS4_CONTENT_HOLE); @@ -2105,7 +2106,6 @@ TRACE_DEFINE_ENUM(NFS4_CONTENT_HOLE); { NFS4_CONTENT_DATA, "DATA" }, \ { NFS4_CONTENT_HOLE, "HOLE" }) -#ifdef CONFIG_NFS_V4_2 TRACE_EVENT(nfs4_llseek, TP_PROTO( const struct inode *inode, From 27ffed1040f7703e368f37f5f97fef87a79527dd Mon Sep 17 00:00:00 2001 From: Anna Schumaker Date: Thu, 22 Sep 2022 15:18:50 -0400 Subject: [PATCH 14/27] NFSv4.2: Add tracepoints for getxattr, setxattr, and removexattr These functions take similar arguments, and can share a tracepoint class for common formatting. Signed-off-by: Anna Schumaker --- fs/nfs/nfs42proc.c | 3 +++ fs/nfs/nfs4trace.h | 46 ++++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 49 insertions(+) diff --git a/fs/nfs/nfs42proc.c b/fs/nfs/nfs42proc.c index 6dab9e408372..c4791ca00df1 100644 --- a/fs/nfs/nfs42proc.c +++ b/fs/nfs/nfs42proc.c @@ -1175,6 +1175,7 @@ static int _nfs42_proc_removexattr(struct inode *inode, const char *name) ret = nfs4_call_sync(server->client, server, &msg, &args.seq_args, &res.seq_res, 1); + trace_nfs4_removexattr(inode, name, ret); if (!ret) nfs4_update_changeattr(inode, &res.cinfo, timestamp, 0); @@ -1214,6 +1215,7 @@ static int _nfs42_proc_setxattr(struct inode *inode, const char *name, ret = nfs4_call_sync(server->client, server, &msg, &arg.seq_args, &res.seq_res, 1); + trace_nfs4_setxattr(inode, name, ret); for (; np > 0; np--) put_page(pages[np - 1]); @@ -1246,6 +1248,7 @@ static ssize_t _nfs42_proc_getxattr(struct inode *inode, const char *name, ret = nfs4_call_sync(server->client, server, &msg, &arg.seq_args, &res.seq_res, 0); + trace_nfs4_getxattr(inode, name, ret); if (ret < 0) return ret; diff --git a/fs/nfs/nfs4trace.h b/fs/nfs/nfs4trace.h index 37c4c105ed29..650c9353826f 100644 --- a/fs/nfs/nfs4trace.h +++ b/fs/nfs/nfs4trace.h @@ -2496,6 +2496,52 @@ TRACE_EVENT(nfs4_offload_cancel, __entry->stateid_seq, __entry->stateid_hash ) ); + +DECLARE_EVENT_CLASS(nfs4_xattr_event, + TP_PROTO( + const struct inode *inode, + const char *name, + int error + ), + + TP_ARGS(inode, name, error), + + TP_STRUCT__entry( + __field(unsigned long, error) + __field(dev_t, dev) + __field(u32, fhandle) + __field(u64, fileid) + __string(name, name) + ), + + TP_fast_assign( + __entry->error = error < 0 ? -error : 0; + __entry->dev = inode->i_sb->s_dev; + __entry->fileid = NFS_FILEID(inode); + __entry->fhandle = nfs_fhandle_hash(NFS_FH(inode)); + __assign_str(name, name); + ), + + TP_printk( + "error=%ld (%s) fileid=%02x:%02x:%llu fhandle=0x%08x " + "name=%s", + -__entry->error, show_nfs4_status(__entry->error), + MAJOR(__entry->dev), MINOR(__entry->dev), + (unsigned long long)__entry->fileid, + __entry->fhandle, __get_str(name) + ) +); +#define DEFINE_NFS4_XATTR_EVENT(name) \ + DEFINE_EVENT(nfs4_xattr_event, name, \ + TP_PROTO( \ + const struct inode *inode, \ + const char *name, \ + int error \ + ), \ + TP_ARGS(inode, name, error)) +DEFINE_NFS4_XATTR_EVENT(nfs4_getxattr); +DEFINE_NFS4_XATTR_EVENT(nfs4_setxattr); +DEFINE_NFS4_XATTR_EVENT(nfs4_removexattr); #endif /* CONFIG_NFS_V4_2 */ #endif /* CONFIG_NFS_V4_1 */ From a0b685e7bd7c5d232a64b0707d2f83ae3e1840dc Mon Sep 17 00:00:00 2001 From: Anna Schumaker Date: Mon, 3 Oct 2022 13:03:52 -0400 Subject: [PATCH 15/27] NFSv4.2: Add a tracepoint for listxattr This can be defined as simply an NFS4_INODE_EVENT() since we don't have the name of a specific xattr to list. This roughly matches readdir, which also uses an NFS4_INODE_EVENT() tracepoint. Signed-off-by: Anna Schumaker --- fs/nfs/nfs42proc.c | 1 + fs/nfs/nfs4trace.h | 2 ++ 2 files changed, 3 insertions(+) diff --git a/fs/nfs/nfs42proc.c b/fs/nfs/nfs42proc.c index c4791ca00df1..ced9170701b6 100644 --- a/fs/nfs/nfs42proc.c +++ b/fs/nfs/nfs42proc.c @@ -1320,6 +1320,7 @@ static ssize_t _nfs42_proc_listxattrs(struct inode *inode, void *buf, ret = nfs4_call_sync(server->client, server, &msg, &arg.seq_args, &res.seq_res, 0); + trace_nfs4_listxattr(inode, ret); if (ret >= 0) { ret = res.copied; diff --git a/fs/nfs/nfs4trace.h b/fs/nfs/nfs4trace.h index 650c9353826f..2cff5901c689 100644 --- a/fs/nfs/nfs4trace.h +++ b/fs/nfs/nfs4trace.h @@ -2542,6 +2542,8 @@ DECLARE_EVENT_CLASS(nfs4_xattr_event, DEFINE_NFS4_XATTR_EVENT(nfs4_getxattr); DEFINE_NFS4_XATTR_EVENT(nfs4_setxattr); DEFINE_NFS4_XATTR_EVENT(nfs4_removexattr); + +DEFINE_NFS4_INODE_EVENT(nfs4_listxattr); #endif /* CONFIG_NFS_V4_2 */ #endif /* CONFIG_NFS_V4_1 */ From 6b1eb3b22272713b5153deba812b6e3943ddd683 Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Sun, 18 Sep 2022 13:28:16 -0400 Subject: [PATCH 16/27] SUNRPC: Replace the use of the xprtiod WQ in rpcrdma While setting up a new lab, I accidentally misconfigured the Ethernet port for a system that tried an NFS mount using RoCE. This made the NFS server unreachable. The following WARNING popped on the NFS client while waiting for the mount attempt to time out: kernel: workqueue: WQ_MEM_RECLAIM xprtiod:xprt_rdma_connect_worker [rpcrdma] is flushing !WQ_MEM_RECLAI> kernel: WARNING: CPU: 0 PID: 100 at kernel/workqueue.c:2628 check_flush_dependency+0xbf/0xca kernel: Modules linked in: rpcsec_gss_krb5 nfsv4 dns_resolver nfs 8021q garp stp mrp llc rfkill rpcrdma> kernel: CPU: 0 PID: 100 Comm: kworker/u8:8 Not tainted 6.0.0-rc1-00002-g6229f8c054e5 #13 kernel: Hardware name: Supermicro X10SRA-F/X10SRA-F, BIOS 2.0b 06/12/2017 kernel: Workqueue: xprtiod xprt_rdma_connect_worker [rpcrdma] kernel: RIP: 0010:check_flush_dependency+0xbf/0xca kernel: Code: 75 2a 48 8b 55 18 48 8d 8b b0 00 00 00 4d 89 e0 48 81 c6 b0 00 00 00 48 c7 c7 65 33 2e be> kernel: RSP: 0018:ffffb562806cfcf8 EFLAGS: 00010092 kernel: RAX: 0000000000000082 RBX: ffff97894f8c3c00 RCX: 0000000000000027 kernel: RDX: 0000000000000002 RSI: ffffffffbe3447d1 RDI: 00000000ffffffff kernel: RBP: ffff978941315840 R08: 0000000000000000 R09: 0000000000000000 kernel: R10: 00000000000008b0 R11: 0000000000000001 R12: ffffffffc0ce3731 kernel: R13: ffff978950c00500 R14: ffff97894341f0c0 R15: ffff978951112eb0 kernel: FS: 0000000000000000(0000) GS:ffff97987fc00000(0000) knlGS:0000000000000000 kernel: CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 kernel: CR2: 00007f807535eae8 CR3: 000000010b8e4002 CR4: 00000000003706f0 kernel: DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000 kernel: DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400 kernel: Call Trace: kernel: kernel: __flush_work.isra.0+0xaf/0x188 kernel: ? _raw_spin_lock_irqsave+0x2c/0x37 kernel: ? lock_timer_base+0x38/0x5f kernel: __cancel_work_timer+0xea/0x13d kernel: ? preempt_latency_start+0x2b/0x46 kernel: rdma_addr_cancel+0x70/0x81 [ib_core] kernel: _destroy_id+0x1a/0x246 [rdma_cm] kernel: rpcrdma_xprt_connect+0x115/0x5ae [rpcrdma] kernel: ? _raw_spin_unlock+0x14/0x29 kernel: ? raw_spin_rq_unlock_irq+0x5/0x10 kernel: ? finish_task_switch.isra.0+0x171/0x249 kernel: xprt_rdma_connect_worker+0x3b/0xc7 [rpcrdma] kernel: process_one_work+0x1d8/0x2d4 kernel: worker_thread+0x18b/0x24f kernel: ? rescuer_thread+0x280/0x280 kernel: kthread+0xf4/0xfc kernel: ? kthread_complete_and_exit+0x1b/0x1b kernel: ret_from_fork+0x22/0x30 kernel: SUNRPC's xprtiod workqueue is WQ_MEM_RECLAIM, so any workqueue that one of its work items tries to cancel has to be WQ_MEM_RECLAIM to prevent a priority inversion. The internal workqueues in the RDMA/core are currently non-MEM_RECLAIM. Jason Gunthorpe says this about the current state of RDMA/core: > If you attempt to do a reconnection/etc from within a RECLAIM > context it will deadlock on one of the many allocations that are > made to support opening the connection. > > The general idea of reclaim is that the entire task context > working under the reclaim is marked with an override of the gfp > flags to make all allocations under that call chain reclaim safe. > > But rdmacm does allocations outside this, eg in the WQs processing > the CM packets. So this doesn't work and we will deadlock. > > Fixing it is a big deal and needs more than poking WQ_MEM_RECLAIM > here and there. So we will change the ULP in this case to avoid the use of WQ_MEM_RECLAIM where possible. Deadlocks that were possible before are not fixed, but at least we no longer have a false sense of confidence that the stack won't allocate memory during memory reclaim. Suggested-by: Leon Romanovsky Signed-off-by: Chuck Lever Signed-off-by: Anna Schumaker --- net/sunrpc/xprtrdma/transport.c | 3 +-- net/sunrpc/xprtrdma/verbs.c | 10 +++------- 2 files changed, 4 insertions(+), 9 deletions(-) diff --git a/net/sunrpc/xprtrdma/transport.c b/net/sunrpc/xprtrdma/transport.c index bcb37b51adf6..10bb2b929c6d 100644 --- a/net/sunrpc/xprtrdma/transport.c +++ b/net/sunrpc/xprtrdma/transport.c @@ -494,8 +494,7 @@ xprt_rdma_connect(struct rpc_xprt *xprt, struct rpc_task *task) xprt_reconnect_backoff(xprt, RPCRDMA_INIT_REEST_TO); } trace_xprtrdma_op_connect(r_xprt, delay); - queue_delayed_work(xprtiod_workqueue, &r_xprt->rx_connect_worker, - delay); + queue_delayed_work(system_long_wq, &r_xprt->rx_connect_worker, delay); } /** diff --git a/net/sunrpc/xprtrdma/verbs.c b/net/sunrpc/xprtrdma/verbs.c index 2fbe9aaeec34..049c854b7b37 100644 --- a/net/sunrpc/xprtrdma/verbs.c +++ b/net/sunrpc/xprtrdma/verbs.c @@ -791,13 +791,9 @@ void rpcrdma_mrs_refresh(struct rpcrdma_xprt *r_xprt) /* If there is no underlying connection, it's no use * to wake the refresh worker. */ - if (ep->re_connect_status == 1) { - /* The work is scheduled on a WQ_MEM_RECLAIM - * workqueue in order to prevent MR allocation - * from recursing into NFS during direct reclaim. - */ - queue_work(xprtiod_workqueue, &buf->rb_refresh_worker); - } + if (ep->re_connect_status != 1) + return; + queue_work(system_highpri_wq, &buf->rb_refresh_worker); } /** From 5014831264b05be11090668ae2211e64a1765f7e Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Fri, 23 Sep 2022 09:06:05 -0400 Subject: [PATCH 17/27] svcrdma: Clean up RPCRDMA_DEF_GFP xprt_rdma_bc_allocate() is now the only user of RPCRDMA_DEF_GFP. Replace that macro with the raw flags. Signed-off-by: Chuck Lever Signed-off-by: Anna Schumaker --- net/sunrpc/xprtrdma/svc_rdma_backchannel.c | 4 ++-- net/sunrpc/xprtrdma/xprt_rdma.h | 2 -- 2 files changed, 2 insertions(+), 4 deletions(-) diff --git a/net/sunrpc/xprtrdma/svc_rdma_backchannel.c b/net/sunrpc/xprtrdma/svc_rdma_backchannel.c index 85c8cdda98b1..aa2227a7e552 100644 --- a/net/sunrpc/xprtrdma/svc_rdma_backchannel.c +++ b/net/sunrpc/xprtrdma/svc_rdma_backchannel.c @@ -119,12 +119,12 @@ xprt_rdma_bc_allocate(struct rpc_task *task) return -EINVAL; } - page = alloc_page(RPCRDMA_DEF_GFP); + page = alloc_page(GFP_NOIO | __GFP_NOWARN); if (!page) return -ENOMEM; rqst->rq_buffer = page_address(page); - rqst->rq_rbuffer = kmalloc(rqst->rq_rcvsize, RPCRDMA_DEF_GFP); + rqst->rq_rbuffer = kmalloc(rqst->rq_rcvsize, GFP_NOIO | __GFP_NOWARN); if (!rqst->rq_rbuffer) { put_page(page); return -ENOMEM; diff --git a/net/sunrpc/xprtrdma/xprt_rdma.h b/net/sunrpc/xprtrdma/xprt_rdma.h index c79f92eeda76..84b685c45555 100644 --- a/net/sunrpc/xprtrdma/xprt_rdma.h +++ b/net/sunrpc/xprtrdma/xprt_rdma.h @@ -149,8 +149,6 @@ static inline void *rdmab_data(const struct rpcrdma_regbuf *rb) return rb->rg_data; } -#define RPCRDMA_DEF_GFP (GFP_NOIO | __GFP_NOWARN) - /* To ensure a transport can always make forward progress, * the number of RDMA segments allowed in header chunk lists * is capped at 16. This prevents less-capable devices from From 3b50cc1c7f2170f2eb0fec040b6c3a8574026fce Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Fri, 23 Sep 2022 09:06:11 -0400 Subject: [PATCH 18/27] xprtrdma: Clean up synopsis of rpcrdma_req_create() Commit 1769e6a816df ("xprtrdma: Clean up rpcrdma_create_req()") added rpcrdma_req_create() with a GFP flags argument in case a caller might want to avoid waiting for memory. There has never been a caller that does not pass GFP_KERNEL as the third argument. That argument can therefore be eliminated. Signed-off-by: Chuck Lever Signed-off-by: Anna Schumaker --- net/sunrpc/xprtrdma/backchannel.c | 2 +- net/sunrpc/xprtrdma/verbs.c | 16 ++++++++-------- net/sunrpc/xprtrdma/xprt_rdma.h | 4 ++-- 3 files changed, 11 insertions(+), 11 deletions(-) diff --git a/net/sunrpc/xprtrdma/backchannel.c b/net/sunrpc/xprtrdma/backchannel.c index faba7136dd9a..e4d84a13c566 100644 --- a/net/sunrpc/xprtrdma/backchannel.c +++ b/net/sunrpc/xprtrdma/backchannel.c @@ -189,7 +189,7 @@ create_req: return NULL; size = min_t(size_t, r_xprt->rx_ep->re_inline_recv, PAGE_SIZE); - req = rpcrdma_req_create(r_xprt, size, GFP_KERNEL); + req = rpcrdma_req_create(r_xprt, size); if (!req) return NULL; if (rpcrdma_req_setup(r_xprt, req)) { diff --git a/net/sunrpc/xprtrdma/verbs.c b/net/sunrpc/xprtrdma/verbs.c index 049c854b7b37..89f5444f4d41 100644 --- a/net/sunrpc/xprtrdma/verbs.c +++ b/net/sunrpc/xprtrdma/verbs.c @@ -800,25 +800,25 @@ void rpcrdma_mrs_refresh(struct rpcrdma_xprt *r_xprt) * rpcrdma_req_create - Allocate an rpcrdma_req object * @r_xprt: controlling r_xprt * @size: initial size, in bytes, of send and receive buffers - * @flags: GFP flags passed to memory allocators * * Returns an allocated and fully initialized rpcrdma_req or NULL. */ -struct rpcrdma_req *rpcrdma_req_create(struct rpcrdma_xprt *r_xprt, size_t size, - gfp_t flags) +struct rpcrdma_req *rpcrdma_req_create(struct rpcrdma_xprt *r_xprt, + size_t size) { struct rpcrdma_buffer *buffer = &r_xprt->rx_buf; struct rpcrdma_req *req; - req = kzalloc(sizeof(*req), flags); + req = kzalloc(sizeof(*req), GFP_KERNEL); if (req == NULL) goto out1; - req->rl_sendbuf = rpcrdma_regbuf_alloc(size, DMA_TO_DEVICE, flags); + req->rl_sendbuf = rpcrdma_regbuf_alloc(size, DMA_TO_DEVICE, + GFP_KERNEL); if (!req->rl_sendbuf) goto out2; - req->rl_recvbuf = rpcrdma_regbuf_alloc(size, DMA_NONE, flags); + req->rl_recvbuf = rpcrdma_regbuf_alloc(size, DMA_NONE, GFP_KERNEL); if (!req->rl_recvbuf) goto out3; @@ -1060,8 +1060,8 @@ int rpcrdma_buffer_create(struct rpcrdma_xprt *r_xprt) for (i = 0; i < r_xprt->rx_xprt.max_reqs; i++) { struct rpcrdma_req *req; - req = rpcrdma_req_create(r_xprt, RPCRDMA_V1_DEF_INLINE_SIZE * 2, - GFP_KERNEL); + req = rpcrdma_req_create(r_xprt, + RPCRDMA_V1_DEF_INLINE_SIZE * 2); if (!req) goto out; list_add(&req->rl_list, &buf->rb_send_bufs); diff --git a/net/sunrpc/xprtrdma/xprt_rdma.h b/net/sunrpc/xprtrdma/xprt_rdma.h index 84b685c45555..227dce50cc4b 100644 --- a/net/sunrpc/xprtrdma/xprt_rdma.h +++ b/net/sunrpc/xprtrdma/xprt_rdma.h @@ -465,8 +465,8 @@ void rpcrdma_post_recvs(struct rpcrdma_xprt *r_xprt, int needed, bool temp); /* * Buffer calls - xprtrdma/verbs.c */ -struct rpcrdma_req *rpcrdma_req_create(struct rpcrdma_xprt *r_xprt, size_t size, - gfp_t flags); +struct rpcrdma_req *rpcrdma_req_create(struct rpcrdma_xprt *r_xprt, + size_t size); int rpcrdma_req_setup(struct rpcrdma_xprt *r_xprt, struct rpcrdma_req *req); void rpcrdma_req_destroy(struct rpcrdma_req *req); int rpcrdma_buffer_create(struct rpcrdma_xprt *); From 7ac1879875fffa8f7acfe8b8d6932a039f2b736d Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Fri, 23 Sep 2022 09:06:18 -0400 Subject: [PATCH 19/27] xprtrdma: Clean up synopsis of rpcrdma_regbuf_alloc() Currently all rpcrdma_regbuf_alloc() call sites pass the same value as their third argument. That argument can therefore be eliminated. Signed-off-by: Chuck Lever Signed-off-by: Anna Schumaker --- net/sunrpc/xprtrdma/verbs.c | 19 ++++++++----------- 1 file changed, 8 insertions(+), 11 deletions(-) diff --git a/net/sunrpc/xprtrdma/verbs.c b/net/sunrpc/xprtrdma/verbs.c index 89f5444f4d41..8fb10fc72f69 100644 --- a/net/sunrpc/xprtrdma/verbs.c +++ b/net/sunrpc/xprtrdma/verbs.c @@ -76,8 +76,7 @@ static void rpcrdma_mrs_destroy(struct rpcrdma_xprt *r_xprt); static void rpcrdma_ep_get(struct rpcrdma_ep *ep); static int rpcrdma_ep_put(struct rpcrdma_ep *ep); static struct rpcrdma_regbuf * -rpcrdma_regbuf_alloc(size_t size, enum dma_data_direction direction, - gfp_t flags); +rpcrdma_regbuf_alloc(size_t size, enum dma_data_direction direction); static void rpcrdma_regbuf_dma_unmap(struct rpcrdma_regbuf *rb); static void rpcrdma_regbuf_free(struct rpcrdma_regbuf *rb); @@ -813,12 +812,11 @@ struct rpcrdma_req *rpcrdma_req_create(struct rpcrdma_xprt *r_xprt, if (req == NULL) goto out1; - req->rl_sendbuf = rpcrdma_regbuf_alloc(size, DMA_TO_DEVICE, - GFP_KERNEL); + req->rl_sendbuf = rpcrdma_regbuf_alloc(size, DMA_TO_DEVICE); if (!req->rl_sendbuf) goto out2; - req->rl_recvbuf = rpcrdma_regbuf_alloc(size, DMA_NONE, GFP_KERNEL); + req->rl_recvbuf = rpcrdma_regbuf_alloc(size, DMA_NONE); if (!req->rl_recvbuf) goto out3; @@ -854,7 +852,7 @@ int rpcrdma_req_setup(struct rpcrdma_xprt *r_xprt, struct rpcrdma_req *req) r_xprt->rx_ep->re_max_rdma_segs * rpcrdma_readchunk_maxsz; maxhdrsize *= sizeof(__be32); rb = rpcrdma_regbuf_alloc(__roundup_pow_of_two(maxhdrsize), - DMA_TO_DEVICE, GFP_KERNEL); + DMA_TO_DEVICE); if (!rb) goto out; @@ -930,7 +928,7 @@ struct rpcrdma_rep *rpcrdma_rep_create(struct rpcrdma_xprt *r_xprt, goto out; rep->rr_rdmabuf = rpcrdma_regbuf_alloc(r_xprt->rx_ep->re_inline_recv, - DMA_FROM_DEVICE, GFP_KERNEL); + DMA_FROM_DEVICE); if (!rep->rr_rdmabuf) goto out_free; @@ -1231,15 +1229,14 @@ void rpcrdma_buffer_put(struct rpcrdma_buffer *buffers, struct rpcrdma_req *req) * or Replies they may be registered externally via frwr_map. */ static struct rpcrdma_regbuf * -rpcrdma_regbuf_alloc(size_t size, enum dma_data_direction direction, - gfp_t flags) +rpcrdma_regbuf_alloc(size_t size, enum dma_data_direction direction) { struct rpcrdma_regbuf *rb; - rb = kmalloc(sizeof(*rb), flags); + rb = kmalloc(sizeof(*rb), GFP_KERNEL); if (!rb) return NULL; - rb->rg_data = kmalloc(size, flags); + rb->rg_data = kmalloc(size, GFP_KERNEL); if (!rb->rg_data) { kfree(rb); return NULL; From 2d77058cce9fbff3d69cc05d4eb695f4ff421c03 Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Fri, 23 Sep 2022 09:06:24 -0400 Subject: [PATCH 20/27] xprtrdma: MR-related memory allocation should be allowed to fail xprtrdma always drives a retry of MR allocation if it should fail. It should be safe to not use GFP_KERNEL for this purpose rather than sleeping in the memory allocator. In theory, if these weaker allocations are attempted first, memory exhaustion is likely to cause xprtrdma to fail fast and not then invoke the RDMA core APIs, which still might use GFP_KERNEL. Also note that rpc_task_gfp_mask() always sets __GFP_NORETRY and __GFP_NOWARN when an RPC-related allocation is being done in a worker thread. MR allocation is already always done in worker threads. Signed-off-by: Chuck Lever Signed-off-by: Anna Schumaker --- net/sunrpc/xprtrdma/frwr_ops.c | 17 +++++++---------- net/sunrpc/xprtrdma/verbs.c | 5 ++++- net/sunrpc/xprtrdma/xprt_rdma.h | 6 ++++++ 3 files changed, 17 insertions(+), 11 deletions(-) diff --git a/net/sunrpc/xprtrdma/frwr_ops.c b/net/sunrpc/xprtrdma/frwr_ops.c index de0bdb6b729f..ce55361a822f 100644 --- a/net/sunrpc/xprtrdma/frwr_ops.c +++ b/net/sunrpc/xprtrdma/frwr_ops.c @@ -126,14 +126,15 @@ int frwr_mr_init(struct rpcrdma_xprt *r_xprt, struct rpcrdma_mr *mr) struct ib_mr *frmr; int rc; + sg = kcalloc_node(depth, sizeof(*sg), XPRTRDMA_GFP_FLAGS, + ibdev_to_node(ep->re_id->device)); + if (!sg) + return -ENOMEM; + frmr = ib_alloc_mr(ep->re_pd, ep->re_mrtype, depth); if (IS_ERR(frmr)) goto out_mr_err; - sg = kmalloc_array(depth, sizeof(*sg), GFP_KERNEL); - if (!sg) - goto out_list_err; - mr->mr_xprt = r_xprt; mr->mr_ibmr = frmr; mr->mr_device = NULL; @@ -146,13 +147,9 @@ int frwr_mr_init(struct rpcrdma_xprt *r_xprt, struct rpcrdma_mr *mr) return 0; out_mr_err: - rc = PTR_ERR(frmr); + kfree(sg); trace_xprtrdma_frwr_alloc(mr, rc); - return rc; - -out_list_err: - ib_dereg_mr(frmr); - return -ENOMEM; + return PTR_ERR(frmr); } /** diff --git a/net/sunrpc/xprtrdma/verbs.c b/net/sunrpc/xprtrdma/verbs.c index 8fb10fc72f69..4a7b87e9e47c 100644 --- a/net/sunrpc/xprtrdma/verbs.c +++ b/net/sunrpc/xprtrdma/verbs.c @@ -739,13 +739,16 @@ rpcrdma_mrs_create(struct rpcrdma_xprt *r_xprt) { struct rpcrdma_buffer *buf = &r_xprt->rx_buf; struct rpcrdma_ep *ep = r_xprt->rx_ep; + struct ib_device *device = ep->re_id->device; unsigned int count; + /* Try to allocate enough to perform one full-sized I/O */ for (count = 0; count < ep->re_max_rdma_segs; count++) { struct rpcrdma_mr *mr; int rc; - mr = kzalloc(sizeof(*mr), GFP_KERNEL); + mr = kzalloc_node(sizeof(*mr), XPRTRDMA_GFP_FLAGS, + ibdev_to_node(device)); if (!mr) break; diff --git a/net/sunrpc/xprtrdma/xprt_rdma.h b/net/sunrpc/xprtrdma/xprt_rdma.h index 227dce50cc4b..5e5ff6784ef5 100644 --- a/net/sunrpc/xprtrdma/xprt_rdma.h +++ b/net/sunrpc/xprtrdma/xprt_rdma.h @@ -149,6 +149,12 @@ static inline void *rdmab_data(const struct rpcrdma_regbuf *rb) return rb->rg_data; } +/* Do not use emergency memory reserves, and fail quickly if memory + * cannot be allocated easily. These flags may be used wherever there + * is robust logic to handle a failure to allocate. + */ +#define XPRTRDMA_GFP_FLAGS (__GFP_NOMEMALLOC | __GFP_NORETRY | __GFP_NOWARN) + /* To ensure a transport can always make forward progress, * the number of RDMA segments allowed in header chunk lists * is capped at 16. This prevents less-capable devices from From 9c8f332fbf995dc1d4d30a973d7ad6e1adb56437 Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Fri, 23 Sep 2022 09:06:30 -0400 Subject: [PATCH 21/27] xprtrdma: Memory allocation should be allowed to fail during connect An attempt to establish a connection can always fail and then be retried. GFP_KERNEL allocation is not necessary here. Like MR allocation, establishing a connection is always done in a worker thread. The new GFP flags align with the flags that would be returned by rpc_task_gfp_mask() in this case. Signed-off-by: Chuck Lever Signed-off-by: Anna Schumaker --- net/sunrpc/xprtrdma/verbs.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/net/sunrpc/xprtrdma/verbs.c b/net/sunrpc/xprtrdma/verbs.c index 4a7b87e9e47c..7ca58cb65e27 100644 --- a/net/sunrpc/xprtrdma/verbs.c +++ b/net/sunrpc/xprtrdma/verbs.c @@ -372,7 +372,7 @@ static int rpcrdma_ep_create(struct rpcrdma_xprt *r_xprt) struct rpcrdma_ep *ep; int rc; - ep = kzalloc(sizeof(*ep), GFP_KERNEL); + ep = kzalloc(sizeof(*ep), XPRTRDMA_GFP_FLAGS); if (!ep) return -ENOTCONN; ep->re_xprt = &r_xprt->rx_xprt; @@ -605,7 +605,7 @@ static struct rpcrdma_sendctx *rpcrdma_sendctx_create(struct rpcrdma_ep *ep) struct rpcrdma_sendctx *sc; sc = kzalloc(struct_size(sc, sc_sges, ep->re_attr.cap.max_send_sge), - GFP_KERNEL); + XPRTRDMA_GFP_FLAGS); if (!sc) return NULL; @@ -628,7 +628,7 @@ static int rpcrdma_sendctxs_create(struct rpcrdma_xprt *r_xprt) * Sends are posted. */ i = r_xprt->rx_ep->re_max_requests + RPCRDMA_MAX_BC_REQUESTS; - buf->rb_sc_ctxs = kcalloc(i, sizeof(sc), GFP_KERNEL); + buf->rb_sc_ctxs = kcalloc(i, sizeof(sc), XPRTRDMA_GFP_FLAGS); if (!buf->rb_sc_ctxs) return -ENOMEM; From f20f18c95630e9b53f5081fd5df3bb705c450bbe Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Fri, 23 Sep 2022 09:06:37 -0400 Subject: [PATCH 22/27] xprtrdma: Prevent memory allocations from driving a reclaim Many memory allocations that xprtrdma does can fail safely. Let's use this fact to avoid some potential deadlocks: Replace GFP_KERNEL with GFP flags that do not try hard to acquire memory. Signed-off-by: Chuck Lever Signed-off-by: Anna Schumaker --- net/sunrpc/xprtrdma/verbs.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/net/sunrpc/xprtrdma/verbs.c b/net/sunrpc/xprtrdma/verbs.c index 7ca58cb65e27..44b87e4274b4 100644 --- a/net/sunrpc/xprtrdma/verbs.c +++ b/net/sunrpc/xprtrdma/verbs.c @@ -811,7 +811,7 @@ struct rpcrdma_req *rpcrdma_req_create(struct rpcrdma_xprt *r_xprt, struct rpcrdma_buffer *buffer = &r_xprt->rx_buf; struct rpcrdma_req *req; - req = kzalloc(sizeof(*req), GFP_KERNEL); + req = kzalloc(sizeof(*req), XPRTRDMA_GFP_FLAGS); if (req == NULL) goto out1; @@ -926,7 +926,7 @@ struct rpcrdma_rep *rpcrdma_rep_create(struct rpcrdma_xprt *r_xprt, struct rpcrdma_buffer *buf = &r_xprt->rx_buf; struct rpcrdma_rep *rep; - rep = kzalloc(sizeof(*rep), GFP_KERNEL); + rep = kzalloc(sizeof(*rep), XPRTRDMA_GFP_FLAGS); if (rep == NULL) goto out; @@ -1236,10 +1236,10 @@ rpcrdma_regbuf_alloc(size_t size, enum dma_data_direction direction) { struct rpcrdma_regbuf *rb; - rb = kmalloc(sizeof(*rb), GFP_KERNEL); + rb = kmalloc(sizeof(*rb), XPRTRDMA_GFP_FLAGS); if (!rb) return NULL; - rb->rg_data = kmalloc(size, GFP_KERNEL); + rb->rg_data = kmalloc(size, XPRTRDMA_GFP_FLAGS); if (!rb->rg_data) { kfree(rb); return NULL; From e4266f23ecdf0d3d1f1d9e8fff730e1f962b0687 Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Wed, 28 Sep 2022 09:00:48 -0400 Subject: [PATCH 23/27] xprtrdma: Fix uninitialized variable net/sunrpc/xprtrdma/frwr_ops.c:151:32: warning: variable 'rc' is uninitialized when used here [-Wuninitialized] trace_xprtrdma_frwr_alloc(mr, rc); ^~ net/sunrpc/xprtrdma/frwr_ops.c:127:8: note: initialize the variable 'rc' to silence this warning int rc; ^ = 0 1 warning generated. The tracepoint is intended to record the error returned from ib_alloc_mr(). In the current code there is no other purpose for @rc, so simply replace it. Reported-by: kernel test robot Fixes: d8cf39a280c3b0 ('xprtrdma: MR-related memory allocation should be allowed to fail') Signed-off-by: Chuck Lever Signed-off-by: Anna Schumaker --- net/sunrpc/xprtrdma/frwr_ops.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/net/sunrpc/xprtrdma/frwr_ops.c b/net/sunrpc/xprtrdma/frwr_ops.c index ce55361a822f..ffbf99894970 100644 --- a/net/sunrpc/xprtrdma/frwr_ops.c +++ b/net/sunrpc/xprtrdma/frwr_ops.c @@ -124,7 +124,6 @@ int frwr_mr_init(struct rpcrdma_xprt *r_xprt, struct rpcrdma_mr *mr) unsigned int depth = ep->re_max_fr_depth; struct scatterlist *sg; struct ib_mr *frmr; - int rc; sg = kcalloc_node(depth, sizeof(*sg), XPRTRDMA_GFP_FLAGS, ibdev_to_node(ep->re_id->device)); @@ -148,7 +147,7 @@ int frwr_mr_init(struct rpcrdma_xprt *r_xprt, struct rpcrdma_mr *mr) out_mr_err: kfree(sg); - trace_xprtrdma_frwr_alloc(mr, rc); + trace_xprtrdma_frwr_alloc(mr, PTR_ERR(frmr)); return PTR_ERR(frmr); } From 39494194f93bed7926d4b3bd03a6a76ba23e612b Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Wed, 5 Oct 2022 15:57:35 -0400 Subject: [PATCH 24/27] SUNRPC: Fix races with rpc_killall_tasks() Ensure that we immediately call rpc_exit_task() after waking up, and that the tk_rpc_status cannot get clobbered by some other function. Signed-off-by: Trond Myklebust Signed-off-by: Anna Schumaker --- include/linux/sunrpc/sched.h | 1 + net/sunrpc/clnt.c | 6 ++---- net/sunrpc/sched.c | 40 ++++++++++++++++++++++-------------- net/sunrpc/xprtsock.c | 3 +-- 4 files changed, 29 insertions(+), 21 deletions(-) diff --git a/include/linux/sunrpc/sched.h b/include/linux/sunrpc/sched.h index acc62647317c..647247040ef9 100644 --- a/include/linux/sunrpc/sched.h +++ b/include/linux/sunrpc/sched.h @@ -209,6 +209,7 @@ struct rpc_task *rpc_run_task(const struct rpc_task_setup *); struct rpc_task *rpc_run_bc_task(struct rpc_rqst *req); void rpc_put_task(struct rpc_task *); void rpc_put_task_async(struct rpc_task *); +bool rpc_task_set_rpc_status(struct rpc_task *task, int rpc_status); void rpc_signal_task(struct rpc_task *); void rpc_exit_task(struct rpc_task *); void rpc_exit(struct rpc_task *, int); diff --git a/net/sunrpc/clnt.c b/net/sunrpc/clnt.c index 4d8665f15dd7..a8c341e43510 100644 --- a/net/sunrpc/clnt.c +++ b/net/sunrpc/clnt.c @@ -1642,7 +1642,7 @@ static void __rpc_call_rpcerror(struct rpc_task *task, int tk_status, int rpc_status) { trace_rpc_call_rpcerror(task, tk_status, rpc_status); - task->tk_rpc_status = rpc_status; + rpc_task_set_rpc_status(task, rpc_status); rpc_exit(task, tk_status); } @@ -2435,10 +2435,8 @@ rpc_check_timeout(struct rpc_task *task) { struct rpc_clnt *clnt = task->tk_client; - if (RPC_SIGNALLED(task)) { - rpc_call_rpcerror(task, -ERESTARTSYS); + if (RPC_SIGNALLED(task)) return; - } if (xprt_adjust_timeout(task->tk_rqstp) == 0) return; diff --git a/net/sunrpc/sched.c b/net/sunrpc/sched.c index 25b9221950ff..f388bfaf6ff0 100644 --- a/net/sunrpc/sched.c +++ b/net/sunrpc/sched.c @@ -65,6 +65,13 @@ gfp_t rpc_task_gfp_mask(void) } EXPORT_SYMBOL_GPL(rpc_task_gfp_mask); +bool rpc_task_set_rpc_status(struct rpc_task *task, int rpc_status) +{ + if (cmpxchg(&task->tk_rpc_status, 0, rpc_status) == 0) + return true; + return false; +} + unsigned long rpc_task_timeout(const struct rpc_task *task) { @@ -855,12 +862,14 @@ void rpc_signal_task(struct rpc_task *task) if (!RPC_IS_ACTIVATED(task)) return; + if (!rpc_task_set_rpc_status(task, -ERESTARTSYS)) + return; trace_rpc_task_signalled(task, task->tk_action); set_bit(RPC_TASK_SIGNALLED, &task->tk_runstate); smp_mb__after_atomic(); queue = READ_ONCE(task->tk_waitqueue); if (queue) - rpc_wake_up_queued_task_set_status(queue, task, -ERESTARTSYS); + rpc_wake_up_queued_task(queue, task); } void rpc_exit(struct rpc_task *task, int status) @@ -907,10 +916,16 @@ static void __rpc_execute(struct rpc_task *task) * Perform the next FSM step or a pending callback. * * tk_action may be NULL if the task has been killed. - * In particular, note that rpc_killall_tasks may - * do this at any time, so beware when dereferencing. */ do_action = task->tk_action; + /* Tasks with an RPC error status should exit */ + if (do_action != rpc_exit_task && + (status = READ_ONCE(task->tk_rpc_status)) != 0) { + task->tk_status = status; + if (do_action != NULL) + do_action = rpc_exit_task; + } + /* Callbacks override all actions */ if (task->tk_callback) { do_action = task->tk_callback; task->tk_callback = NULL; @@ -932,14 +947,6 @@ static void __rpc_execute(struct rpc_task *task) continue; } - /* - * Signalled tasks should exit rather than sleep. - */ - if (RPC_SIGNALLED(task)) { - task->tk_rpc_status = -ERESTARTSYS; - rpc_exit(task, -ERESTARTSYS); - } - /* * The queue->lock protects against races with * rpc_make_runnable(). @@ -955,6 +962,12 @@ static void __rpc_execute(struct rpc_task *task) spin_unlock(&queue->lock); continue; } + /* Wake up any task that has an exit status */ + if (READ_ONCE(task->tk_rpc_status) != 0) { + rpc_wake_up_task_queue_locked(queue, task); + spin_unlock(&queue->lock); + continue; + } rpc_clear_running(task); spin_unlock(&queue->lock); if (task_is_async) @@ -972,10 +985,7 @@ static void __rpc_execute(struct rpc_task *task) * clean up after sleeping on some queue, we don't * break the loop here, but go around once more. */ - trace_rpc_task_signalled(task, task->tk_action); - set_bit(RPC_TASK_SIGNALLED, &task->tk_runstate); - task->tk_rpc_status = -ERESTARTSYS; - rpc_exit(task, -ERESTARTSYS); + rpc_signal_task(task); } trace_rpc_task_sync_wake(task, task->tk_action); } diff --git a/net/sunrpc/xprtsock.c b/net/sunrpc/xprtsock.c index b3341c202ea0..f34d5427b66c 100644 --- a/net/sunrpc/xprtsock.c +++ b/net/sunrpc/xprtsock.c @@ -1978,8 +1978,7 @@ static void xs_local_connect(struct rpc_xprt *xprt, struct rpc_task *task) * we'll need to figure out how to pass a namespace to * connect. */ - task->tk_rpc_status = -ENOTCONN; - rpc_exit(task, -ENOTCONN); + rpc_task_set_rpc_status(task, -ENOTCONN); goto out_wake; } ret = xs_local_setup_socket(transport); From f8423909ecca208834a9d704e58409800f8b5f21 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Wed, 5 Oct 2022 15:57:36 -0400 Subject: [PATCH 25/27] SUNRPC: Add a helper to allow pNFS drivers to selectively cancel RPC calls Add the helper rpc_cancel_tasks(), which uses a caller-defined selection function to define a set of in-flight RPC calls to cancel. This is mainly intended for pNFS drivers which are subject to a layout recall, and which may therefore want to cancel all pending I/O using that layout in order to redrive it after the layout recall has been satisfied. Signed-off-by: Trond Myklebust Signed-off-by: Anna Schumaker --- include/linux/sunrpc/sched.h | 5 +++++ net/sunrpc/clnt.c | 37 ++++++++++++++++++++++++++++++++++++ net/sunrpc/sched.c | 11 +++++++++++ 3 files changed, 53 insertions(+) diff --git a/include/linux/sunrpc/sched.h b/include/linux/sunrpc/sched.h index 647247040ef9..cdcf0fe56a6f 100644 --- a/include/linux/sunrpc/sched.h +++ b/include/linux/sunrpc/sched.h @@ -210,11 +210,16 @@ struct rpc_task *rpc_run_bc_task(struct rpc_rqst *req); void rpc_put_task(struct rpc_task *); void rpc_put_task_async(struct rpc_task *); bool rpc_task_set_rpc_status(struct rpc_task *task, int rpc_status); +void rpc_task_try_cancel(struct rpc_task *task, int error); void rpc_signal_task(struct rpc_task *); void rpc_exit_task(struct rpc_task *); void rpc_exit(struct rpc_task *, int); void rpc_release_calldata(const struct rpc_call_ops *, void *); void rpc_killall_tasks(struct rpc_clnt *); +unsigned long rpc_cancel_tasks(struct rpc_clnt *clnt, int error, + bool (*fnmatch)(const struct rpc_task *, + const void *), + const void *data); void rpc_execute(struct rpc_task *); void rpc_init_priority_wait_queue(struct rpc_wait_queue *, const char *); void rpc_init_wait_queue(struct rpc_wait_queue *, const char *); diff --git a/net/sunrpc/clnt.c b/net/sunrpc/clnt.c index a8c341e43510..57677517b474 100644 --- a/net/sunrpc/clnt.c +++ b/net/sunrpc/clnt.c @@ -873,6 +873,43 @@ void rpc_killall_tasks(struct rpc_clnt *clnt) } EXPORT_SYMBOL_GPL(rpc_killall_tasks); +/** + * rpc_cancel_tasks - try to cancel a set of RPC tasks + * @clnt: Pointer to RPC client + * @error: RPC task error value to set + * @fnmatch: Pointer to selector function + * @data: User data + * + * Uses @fnmatch to define a set of RPC tasks that are to be cancelled. + * The argument @error must be a negative error value. + */ +unsigned long rpc_cancel_tasks(struct rpc_clnt *clnt, int error, + bool (*fnmatch)(const struct rpc_task *, + const void *), + const void *data) +{ + struct rpc_task *task; + unsigned long count = 0; + + if (list_empty(&clnt->cl_tasks)) + return 0; + /* + * Spin lock all_tasks to prevent changes... + */ + spin_lock(&clnt->cl_lock); + list_for_each_entry(task, &clnt->cl_tasks, tk_task) { + if (!RPC_IS_ACTIVATED(task)) + continue; + if (!fnmatch(task, data)) + continue; + rpc_task_try_cancel(task, error); + count++; + } + spin_unlock(&clnt->cl_lock); + return count; +} +EXPORT_SYMBOL_GPL(rpc_cancel_tasks); + /* * Properly shut down an RPC client, terminating all outstanding * requests. diff --git a/net/sunrpc/sched.c b/net/sunrpc/sched.c index f388bfaf6ff0..de912e02371b 100644 --- a/net/sunrpc/sched.c +++ b/net/sunrpc/sched.c @@ -872,6 +872,17 @@ void rpc_signal_task(struct rpc_task *task) rpc_wake_up_queued_task(queue, task); } +void rpc_task_try_cancel(struct rpc_task *task, int error) +{ + struct rpc_wait_queue *queue; + + if (!rpc_task_set_rpc_status(task, error)) + return; + queue = READ_ONCE(task->tk_waitqueue); + if (queue) + rpc_wake_up_queued_task(queue, task); +} + void rpc_exit(struct rpc_task *task, int status) { task->tk_status = status; From dc4c4304855a5721d214e2a53e17df5152dd5f34 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Wed, 5 Oct 2022 15:57:37 -0400 Subject: [PATCH 26/27] SUNRPC: Add API to force the client to disconnect Allow the caller to force a disconnection of the RPC client so that we can clear any pending requests that are buffered in the socket. Signed-off-by: Trond Myklebust Signed-off-by: Anna Schumaker --- include/linux/sunrpc/clnt.h | 1 + net/sunrpc/clnt.c | 14 ++++++++++++++ 2 files changed, 15 insertions(+) diff --git a/include/linux/sunrpc/clnt.h b/include/linux/sunrpc/clnt.h index 75eea5ebb179..770ef2cb5775 100644 --- a/include/linux/sunrpc/clnt.h +++ b/include/linux/sunrpc/clnt.h @@ -246,6 +246,7 @@ void rpc_clnt_xprt_switch_remove_xprt(struct rpc_clnt *, struct rpc_xprt *); bool rpc_clnt_xprt_switch_has_addr(struct rpc_clnt *clnt, const struct sockaddr *sap); void rpc_clnt_xprt_set_online(struct rpc_clnt *clnt, struct rpc_xprt *xprt); +void rpc_clnt_disconnect(struct rpc_clnt *clnt); void rpc_cleanup_clids(void); static inline int rpc_reply_expected(struct rpc_task *task) diff --git a/net/sunrpc/clnt.c b/net/sunrpc/clnt.c index 57677517b474..993acf38af87 100644 --- a/net/sunrpc/clnt.c +++ b/net/sunrpc/clnt.c @@ -910,6 +910,20 @@ unsigned long rpc_cancel_tasks(struct rpc_clnt *clnt, int error, } EXPORT_SYMBOL_GPL(rpc_cancel_tasks); +static int rpc_clnt_disconnect_xprt(struct rpc_clnt *clnt, + struct rpc_xprt *xprt, void *dummy) +{ + if (xprt_connected(xprt)) + xprt_force_disconnect(xprt); + return 0; +} + +void rpc_clnt_disconnect(struct rpc_clnt *clnt) +{ + rpc_clnt_iterate_for_each_xprt(clnt, rpc_clnt_disconnect_xprt, NULL); +} +EXPORT_SYMBOL_GPL(rpc_clnt_disconnect); + /* * Properly shut down an RPC client, terminating all outstanding * requests. From b739a5bd9d9f18cc69dced8db128ef7206e000cd Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Wed, 5 Oct 2022 15:57:38 -0400 Subject: [PATCH 27/27] NFSv4/flexfiles: Cancel I/O if the layout is recalled or revoked If the layout is recalled or revoked, we want to cancel I/O as quickly as possible so that we can return the layout. Signed-off-by: Trond Myklebust Signed-off-by: Anna Schumaker --- fs/nfs/flexfilelayout/flexfilelayout.c | 84 +++++++++++++++++++++++++- fs/nfs/pnfs.c | 9 ++- fs/nfs/pnfs.h | 9 +++ 3 files changed, 97 insertions(+), 5 deletions(-) diff --git a/fs/nfs/flexfilelayout/flexfilelayout.c b/fs/nfs/flexfilelayout/flexfilelayout.c index 1443330ae998..1ec79ccf89ad 100644 --- a/fs/nfs/flexfilelayout/flexfilelayout.c +++ b/fs/nfs/flexfilelayout/flexfilelayout.c @@ -1379,6 +1379,11 @@ static int ff_layout_read_prepare_common(struct rpc_task *task, return -EIO; } + if (!pnfs_is_valid_lseg(hdr->lseg)) { + rpc_exit(task, -EAGAIN); + return -EAGAIN; + } + ff_layout_read_record_layoutstats_start(task, hdr); return 0; } @@ -1559,6 +1564,11 @@ static int ff_layout_write_prepare_common(struct rpc_task *task, return -EIO; } + if (!pnfs_is_valid_lseg(hdr->lseg)) { + rpc_exit(task, -EAGAIN); + return -EAGAIN; + } + ff_layout_write_record_layoutstats_start(task, hdr); return 0; } @@ -1651,15 +1661,23 @@ static void ff_layout_commit_record_layoutstats_done(struct rpc_task *task, set_bit(NFS_LSEG_LAYOUTRETURN, &cdata->lseg->pls_flags); } -static void ff_layout_commit_prepare_common(struct rpc_task *task, - struct nfs_commit_data *cdata) +static int ff_layout_commit_prepare_common(struct rpc_task *task, + struct nfs_commit_data *cdata) { + if (!pnfs_is_valid_lseg(cdata->lseg)) { + rpc_exit(task, -EAGAIN); + return -EAGAIN; + } + ff_layout_commit_record_layoutstats_start(task, cdata); + return 0; } static void ff_layout_commit_prepare_v3(struct rpc_task *task, void *data) { - ff_layout_commit_prepare_common(task, data); + if (ff_layout_commit_prepare_common(task, data)) + return; + rpc_call_start(task); } @@ -1955,6 +1973,65 @@ ff_layout_commit_pagelist(struct inode *inode, struct list_head *mds_pages, ff_layout_initiate_commit); } +static bool ff_layout_match_rw(const struct rpc_task *task, + const struct nfs_pgio_header *hdr, + const struct pnfs_layout_segment *lseg) +{ + return hdr->lseg == lseg; +} + +static bool ff_layout_match_commit(const struct rpc_task *task, + const struct nfs_commit_data *cdata, + const struct pnfs_layout_segment *lseg) +{ + return cdata->lseg == lseg; +} + +static bool ff_layout_match_io(const struct rpc_task *task, const void *data) +{ + const struct rpc_call_ops *ops = task->tk_ops; + + if (ops == &ff_layout_read_call_ops_v3 || + ops == &ff_layout_read_call_ops_v4 || + ops == &ff_layout_write_call_ops_v3 || + ops == &ff_layout_write_call_ops_v4) + return ff_layout_match_rw(task, task->tk_calldata, data); + if (ops == &ff_layout_commit_call_ops_v3 || + ops == &ff_layout_commit_call_ops_v4) + return ff_layout_match_commit(task, task->tk_calldata, data); + return false; +} + +static void ff_layout_cancel_io(struct pnfs_layout_segment *lseg) +{ + struct nfs4_ff_layout_segment *flseg = FF_LAYOUT_LSEG(lseg); + struct nfs4_ff_layout_mirror *mirror; + struct nfs4_ff_layout_ds *mirror_ds; + struct nfs4_pnfs_ds *ds; + struct nfs_client *ds_clp; + struct rpc_clnt *clnt; + u32 idx; + + for (idx = 0; idx < flseg->mirror_array_cnt; idx++) { + mirror = flseg->mirror_array[idx]; + mirror_ds = mirror->mirror_ds; + if (!mirror_ds) + continue; + ds = mirror->mirror_ds->ds; + if (!ds) + continue; + ds_clp = ds->ds_clp; + if (!ds_clp) + continue; + clnt = ds_clp->cl_rpcclient; + if (!clnt) + continue; + if (!rpc_cancel_tasks(clnt, -EAGAIN, ff_layout_match_io, lseg)) + continue; + rpc_clnt_disconnect(clnt); + } +} + static struct pnfs_ds_commit_info * ff_layout_get_ds_info(struct inode *inode) { @@ -2512,6 +2589,7 @@ static struct pnfs_layoutdriver_type flexfilelayout_type = { .prepare_layoutreturn = ff_layout_prepare_layoutreturn, .sync = pnfs_nfs_generic_sync, .prepare_layoutstats = ff_layout_prepare_layoutstats, + .cancel_io = ff_layout_cancel_io, }; static int __init nfs4flexfilelayout_init(void) diff --git a/fs/nfs/pnfs.c b/fs/nfs/pnfs.c index 2613b7e36eb9..d41fc1558e91 100644 --- a/fs/nfs/pnfs.c +++ b/fs/nfs/pnfs.c @@ -710,6 +710,7 @@ pnfs_mark_matching_lsegs_invalid(struct pnfs_layout_hdr *lo, u32 seq) { struct pnfs_layout_segment *lseg, *next; + struct nfs_server *server = NFS_SERVER(lo->plh_inode); int remaining = 0; dprintk("%s:Begin lo %p\n", __func__, lo); @@ -722,8 +723,10 @@ pnfs_mark_matching_lsegs_invalid(struct pnfs_layout_hdr *lo, "offset %llu length %llu\n", __func__, lseg, lseg->pls_range.iomode, lseg->pls_seq, lseg->pls_range.offset, lseg->pls_range.length); - if (!mark_lseg_invalid(lseg, tmp_list)) - remaining++; + if (mark_lseg_invalid(lseg, tmp_list)) + continue; + remaining++; + pnfs_lseg_cancel_io(server, lseg); } dprintk("%s:Return %i\n", __func__, remaining); return remaining; @@ -2485,6 +2488,7 @@ pnfs_mark_matching_lsegs_return(struct pnfs_layout_hdr *lo, u32 seq) { struct pnfs_layout_segment *lseg, *next; + struct nfs_server *server = NFS_SERVER(lo->plh_inode); int remaining = 0; dprintk("%s:Begin lo %p\n", __func__, lo); @@ -2507,6 +2511,7 @@ pnfs_mark_matching_lsegs_return(struct pnfs_layout_hdr *lo, continue; remaining++; set_bit(NFS_LSEG_LAYOUTRETURN, &lseg->pls_flags); + pnfs_lseg_cancel_io(server, lseg); } if (remaining) { diff --git a/fs/nfs/pnfs.h b/fs/nfs/pnfs.h index f331f067691b..e3e6a41f19de 100644 --- a/fs/nfs/pnfs.h +++ b/fs/nfs/pnfs.h @@ -169,6 +169,8 @@ struct pnfs_layoutdriver_type { void (*cleanup_layoutcommit) (struct nfs4_layoutcommit_data *data); int (*prepare_layoutcommit) (struct nfs4_layoutcommit_args *args); int (*prepare_layoutstats) (struct nfs42_layoutstat_args *args); + + void (*cancel_io)(struct pnfs_layout_segment *lseg); }; struct pnfs_commit_ops { @@ -685,6 +687,13 @@ pnfs_lseg_request_intersecting(struct pnfs_layout_segment *lseg, struct nfs_page req_offset(req), req_last); } +static inline void pnfs_lseg_cancel_io(struct nfs_server *server, + struct pnfs_layout_segment *lseg) +{ + if (server->pnfs_curr_ld->cancel_io) + server->pnfs_curr_ld->cancel_io(lseg); +} + extern unsigned int layoutstats_timer; #ifdef NFS_DEBUG