linux/net/sunrpc/clnt.c

3417 lines
80 KiB
C
Raw Normal View History

// SPDX-License-Identifier: GPL-2.0-only
/*
* linux/net/sunrpc/clnt.c
*
* This file contains the high-level RPC interface.
* It is modeled as a finite state machine to support both synchronous
* and asynchronous requests.
*
* - RPC header generation and argument serialization.
* - Credential refresh.
* - TCP connect handling.
* - Retry of operation when it is suspected the operation failed because
* of uid squashing on the server, or when the credentials were stale
* and need to be refreshed, or when a packet was damaged in transit.
* This may be have to be moved to the VFS layer.
*
* Copyright (C) 1992,1993 Rick Sladkey <jrs@world.std.com>
* Copyright (C) 1995,1996 Olaf Kirch <okir@monad.swb.de>
*/
#include <linux/module.h>
#include <linux/types.h>
SUNRPC: Display some debugging information as text rather than numbers In rpc_show_tasks(), display the program name, version number, procedure name and tk_action as human-readable variable-length text fields rather than columnar numbers. Doing the symbol lookup here helps in cases where we have actual debugging output from a kernel log, but don't have access to the kernel image or RPC module that generated the output. Sample output: -pid- flgs status -client- --rqstp- -timeout ---ops-- 5608 0001 -11 eeb42690 f6d93710 0 f8fa1764 nfsv3 WRITE a:call_transmit_status q:none 5609 0001 -11 eeb42690 f6d937e0 0 f8fa1764 nfsv3 WRITE a:call_status q:xprt_sending 5610 0001 -11 eeb42690 f6d93230 0 f8fa1764 nfsv3 WRITE a:call_status q:xprt_sending 5611 0001 -11 eeb42690 f6d93300 0 f8fa1764 nfsv3 WRITE a:call_status q:xprt_sending 5612 0001 -11 eeb42690 f6d93090 0 f8fa1764 nfsv3 WRITE a:call_status q:xprt_sending 5613 0001 -11 eeb42690 f6d933d0 0 f8fa1764 nfsv3 WRITE a:call_status q:xprt_sending 5614 0001 -11 eeb42690 f6d93cc0 0 f8fa1764 nfsv3 WRITE a:call_status q:xprt_sending 5615 0001 -11 eeb42690 f6d93a50 0 f8fa1764 nfsv3 WRITE a:call_status q:xprt_sending 5616 0001 -11 eeb42690 f6d93640 0 f8fa1764 nfsv3 WRITE a:call_status q:xprt_sending 5617 0001 -11 eeb42690 f6d93b20 0 f8fa1764 nfsv3 WRITE a:call_status q:xprt_sending 5618 0001 -11 eeb42690 f6d93160 0 f8fa1764 nfsv3 WRITE a:call_status q:xprt_sending Signed-off-by: Chuck Lever <chuck.lever@oracle.com> Signed-off-by: Trond Myklebust <Trond.Myklebust@netapp.com>
2008-05-22 01:09:41 +04:00
#include <linux/kallsyms.h>
#include <linux/mm.h>
#include <linux/namei.h>
#include <linux/mount.h>
#include <linux/slab.h>
#include <linux/rcupdate.h>
#include <linux/utsname.h>
#include <linux/workqueue.h>
#include <linux/in.h>
#include <linux/in6.h>
#include <linux/un.h>
#include <linux/sunrpc/clnt.h>
#include <linux/sunrpc/addr.h>
#include <linux/sunrpc/rpc_pipe_fs.h>
#include <linux/sunrpc/metrics.h>
nfs41: Add backchannel processing support to RPC state machine Adds rpc_run_bc_task() which is called by the NFS callback service to process backchannel requests. It performs similar work to rpc_run_task() though "schedules" the backchannel task to be executed starting at the call_trasmit state in the RPC state machine. It also introduces some miscellaneous updates to the argument validation, call_transmit, and transport cleanup functions to take into account that there are now forechannel and backchannel tasks. Backchannel requests do not carry an RPC message structure, since the payload has already been XDR encoded using the existing NFSv4 callback mechanism. Introduce a new transmit state for the client to reply on to backchannel requests. This new state simply reserves the transport and issues the reply. In case of a connection related error, disconnects the transport and drops the reply. It requires the forechannel to re-establish the connection and the server to retransmit the request, as stated in NFSv4.1 section 2.9.2 "Client and Server Transport Behavior". Note: There is no need to loop attempting to reserve the transport. If EAGAIN is returned by xprt_prepare_transmit(), return with tk_status == 0, setting tk_action to call_bc_transmit. rpc_execute() will invoke it again after the task is taken off the sleep queue. [nfs41: rpc_run_bc_task() need not be exported outside RPC module] [nfs41: New call_bc_transmit RPC state] Signed-off-by: Ricardo Labiaga <Ricardo.Labiaga@netapp.com> Signed-off-by: Benny Halevy <bhalevy@panasas.com> [nfs41: Backchannel: No need to loop in call_bc_transmit()] Signed-off-by: Andy Adamson <andros@netapp.com> Signed-off-by: Ricardo Labiaga <Ricardo.Labiaga@netapp.com> Signed-off-by: Benny Halevy <bhalevy@panasas.com> [rpc_count_iostats incorrectly exits early] Signed-off-by: Ricardo Labiaga <Ricardo.Labiaga@netapp.com> Signed-off-by: Benny Halevy <bhalevy@panasas.com> [Convert rpc_reply_expected() to inline function] [Remove unnecessary BUG_ON()] [Rename variable] Signed-off-by: Ricardo Labiaga <Ricardo.Labiaga@netapp.com> Signed-off-by: Benny Halevy <bhalevy@panasas.com>
2009-04-01 17:23:03 +04:00
#include <linux/sunrpc/bc_xprt.h>
#include <trace/events/sunrpc.h>
nfs41: Add backchannel processing support to RPC state machine Adds rpc_run_bc_task() which is called by the NFS callback service to process backchannel requests. It performs similar work to rpc_run_task() though "schedules" the backchannel task to be executed starting at the call_trasmit state in the RPC state machine. It also introduces some miscellaneous updates to the argument validation, call_transmit, and transport cleanup functions to take into account that there are now forechannel and backchannel tasks. Backchannel requests do not carry an RPC message structure, since the payload has already been XDR encoded using the existing NFSv4 callback mechanism. Introduce a new transmit state for the client to reply on to backchannel requests. This new state simply reserves the transport and issues the reply. In case of a connection related error, disconnects the transport and drops the reply. It requires the forechannel to re-establish the connection and the server to retransmit the request, as stated in NFSv4.1 section 2.9.2 "Client and Server Transport Behavior". Note: There is no need to loop attempting to reserve the transport. If EAGAIN is returned by xprt_prepare_transmit(), return with tk_status == 0, setting tk_action to call_bc_transmit. rpc_execute() will invoke it again after the task is taken off the sleep queue. [nfs41: rpc_run_bc_task() need not be exported outside RPC module] [nfs41: New call_bc_transmit RPC state] Signed-off-by: Ricardo Labiaga <Ricardo.Labiaga@netapp.com> Signed-off-by: Benny Halevy <bhalevy@panasas.com> [nfs41: Backchannel: No need to loop in call_bc_transmit()] Signed-off-by: Andy Adamson <andros@netapp.com> Signed-off-by: Ricardo Labiaga <Ricardo.Labiaga@netapp.com> Signed-off-by: Benny Halevy <bhalevy@panasas.com> [rpc_count_iostats incorrectly exits early] Signed-off-by: Ricardo Labiaga <Ricardo.Labiaga@netapp.com> Signed-off-by: Benny Halevy <bhalevy@panasas.com> [Convert rpc_reply_expected() to inline function] [Remove unnecessary BUG_ON()] [Rename variable] Signed-off-by: Ricardo Labiaga <Ricardo.Labiaga@netapp.com> Signed-off-by: Benny Halevy <bhalevy@panasas.com>
2009-04-01 17:23:03 +04:00
#include "sunrpc.h"
#include "sysfs.h"
#include "netns.h"
#if IS_ENABLED(CONFIG_SUNRPC_DEBUG)
# define RPCDBG_FACILITY RPCDBG_CALL
#endif
/*
* All RPC clients are linked into this list
*/
static DECLARE_WAIT_QUEUE_HEAD(destroy_wait);
static void call_start(struct rpc_task *task);
static void call_reserve(struct rpc_task *task);
static void call_reserveresult(struct rpc_task *task);
static void call_allocate(struct rpc_task *task);
static void call_encode(struct rpc_task *task);
static void call_decode(struct rpc_task *task);
static void call_bind(struct rpc_task *task);
static void call_bind_status(struct rpc_task *task);
static void call_transmit(struct rpc_task *task);
static void call_status(struct rpc_task *task);
static void call_transmit_status(struct rpc_task *task);
static void call_refresh(struct rpc_task *task);
static void call_refreshresult(struct rpc_task *task);
static void call_connect(struct rpc_task *task);
static void call_connect_status(struct rpc_task *task);
static int rpc_encode_header(struct rpc_task *task,
struct xdr_stream *xdr);
static int rpc_decode_header(struct rpc_task *task,
struct xdr_stream *xdr);
static int rpc_ping(struct rpc_clnt *clnt);
static int rpc_ping_noreply(struct rpc_clnt *clnt);
static void rpc_check_timeout(struct rpc_task *task);
static void rpc_register_client(struct rpc_clnt *clnt)
{
struct net *net = rpc_net_ns(clnt);
struct sunrpc_net *sn = net_generic(net, sunrpc_net_id);
spin_lock(&sn->rpc_client_lock);
list_add(&clnt->cl_clients, &sn->all_clients);
spin_unlock(&sn->rpc_client_lock);
}
static void rpc_unregister_client(struct rpc_clnt *clnt)
{
struct net *net = rpc_net_ns(clnt);
struct sunrpc_net *sn = net_generic(net, sunrpc_net_id);
spin_lock(&sn->rpc_client_lock);
list_del(&clnt->cl_clients);
spin_unlock(&sn->rpc_client_lock);
}
static void __rpc_clnt_remove_pipedir(struct rpc_clnt *clnt)
{
rpc_remove_client_dir(clnt);
}
static void rpc_clnt_remove_pipedir(struct rpc_clnt *clnt)
{
struct net *net = rpc_net_ns(clnt);
struct super_block *pipefs_sb;
pipefs_sb = rpc_get_sb_net(net);
if (pipefs_sb) {
SUNRPC: Fix RPC client cleaned up the freed pipefs dentries RPC client pipefs dentries cleanup is in separated rpc_remove_pipedir() workqueue,which takes care about pipefs superblock locking. In some special scenarios, when kernel frees the pipefs sb of the current client and immediately alloctes a new pipefs sb, rpc_remove_pipedir function would misjudge the existence of pipefs sb which is not the one it used to hold. As a result, the rpc_remove_pipedir would clean the released freed pipefs dentries. To fix this issue, rpc_remove_pipedir should check whether the current pipefs sb is consistent with the original pipefs sb. This error can be catched by KASAN: ========================================================= [ 250.497700] BUG: KASAN: slab-use-after-free in dget_parent+0x195/0x200 [ 250.498315] Read of size 4 at addr ffff88800a2ab804 by task kworker/0:18/106503 [ 250.500549] Workqueue: events rpc_free_client_work [ 250.501001] Call Trace: [ 250.502880] kasan_report+0xb6/0xf0 [ 250.503209] ? dget_parent+0x195/0x200 [ 250.503561] dget_parent+0x195/0x200 [ 250.503897] ? __pfx_rpc_clntdir_depopulate+0x10/0x10 [ 250.504384] rpc_rmdir_depopulate+0x1b/0x90 [ 250.504781] rpc_remove_client_dir+0xf5/0x150 [ 250.505195] rpc_free_client_work+0xe4/0x230 [ 250.505598] process_one_work+0x8ee/0x13b0 ... [ 22.039056] Allocated by task 244: [ 22.039390] kasan_save_stack+0x22/0x50 [ 22.039758] kasan_set_track+0x25/0x30 [ 22.040109] __kasan_slab_alloc+0x59/0x70 [ 22.040487] kmem_cache_alloc_lru+0xf0/0x240 [ 22.040889] __d_alloc+0x31/0x8e0 [ 22.041207] d_alloc+0x44/0x1f0 [ 22.041514] __rpc_lookup_create_exclusive+0x11c/0x140 [ 22.041987] rpc_mkdir_populate.constprop.0+0x5f/0x110 [ 22.042459] rpc_create_client_dir+0x34/0x150 [ 22.042874] rpc_setup_pipedir_sb+0x102/0x1c0 [ 22.043284] rpc_client_register+0x136/0x4e0 [ 22.043689] rpc_new_client+0x911/0x1020 [ 22.044057] rpc_create_xprt+0xcb/0x370 [ 22.044417] rpc_create+0x36b/0x6c0 ... [ 22.049524] Freed by task 0: [ 22.049803] kasan_save_stack+0x22/0x50 [ 22.050165] kasan_set_track+0x25/0x30 [ 22.050520] kasan_save_free_info+0x2b/0x50 [ 22.050921] __kasan_slab_free+0x10e/0x1a0 [ 22.051306] kmem_cache_free+0xa5/0x390 [ 22.051667] rcu_core+0x62c/0x1930 [ 22.051995] __do_softirq+0x165/0x52a [ 22.052347] [ 22.052503] Last potentially related work creation: [ 22.052952] kasan_save_stack+0x22/0x50 [ 22.053313] __kasan_record_aux_stack+0x8e/0xa0 [ 22.053739] __call_rcu_common.constprop.0+0x6b/0x8b0 [ 22.054209] dentry_free+0xb2/0x140 [ 22.054540] __dentry_kill+0x3be/0x540 [ 22.054900] shrink_dentry_list+0x199/0x510 [ 22.055293] shrink_dcache_parent+0x190/0x240 [ 22.055703] do_one_tree+0x11/0x40 [ 22.056028] shrink_dcache_for_umount+0x61/0x140 [ 22.056461] generic_shutdown_super+0x70/0x590 [ 22.056879] kill_anon_super+0x3a/0x60 [ 22.057234] rpc_kill_sb+0x121/0x200 Fixes: 0157d021d23a ("SUNRPC: handle RPC client pipefs dentries by network namespace aware routines") Signed-off-by: felix <fuzhen5@huawei.com> Signed-off-by: Trond Myklebust <trond.myklebust@hammerspace.com>
2023-10-23 04:40:19 +03:00
if (pipefs_sb == clnt->pipefs_sb)
__rpc_clnt_remove_pipedir(clnt);
rpc_put_sb_net(net);
}
}
static struct dentry *rpc_setup_pipedir_sb(struct super_block *sb,
struct rpc_clnt *clnt)
{
static uint32_t clntid;
const char *dir_name = clnt->cl_program->pipe_dir_name;
char name[15];
struct dentry *dir, *dentry;
dir = rpc_d_lookup_sb(sb, dir_name);
if (dir == NULL) {
pr_info("RPC: pipefs directory doesn't exist: %s\n", dir_name);
return dir;
}
for (;;) {
snprintf(name, sizeof(name), "clnt%x", (unsigned int)clntid++);
name[sizeof(name) - 1] = '\0';
dentry = rpc_create_client_dir(dir, name, clnt);
if (!IS_ERR(dentry))
break;
if (dentry == ERR_PTR(-EEXIST))
continue;
printk(KERN_INFO "RPC: Couldn't create pipefs entry"
" %s/%s, error %ld\n",
dir_name, name, PTR_ERR(dentry));
break;
}
dput(dir);
return dentry;
}
static int
rpc_setup_pipedir(struct super_block *pipefs_sb, struct rpc_clnt *clnt)
{
struct dentry *dentry;
SUNRPC: Fix RPC client cleaned up the freed pipefs dentries RPC client pipefs dentries cleanup is in separated rpc_remove_pipedir() workqueue,which takes care about pipefs superblock locking. In some special scenarios, when kernel frees the pipefs sb of the current client and immediately alloctes a new pipefs sb, rpc_remove_pipedir function would misjudge the existence of pipefs sb which is not the one it used to hold. As a result, the rpc_remove_pipedir would clean the released freed pipefs dentries. To fix this issue, rpc_remove_pipedir should check whether the current pipefs sb is consistent with the original pipefs sb. This error can be catched by KASAN: ========================================================= [ 250.497700] BUG: KASAN: slab-use-after-free in dget_parent+0x195/0x200 [ 250.498315] Read of size 4 at addr ffff88800a2ab804 by task kworker/0:18/106503 [ 250.500549] Workqueue: events rpc_free_client_work [ 250.501001] Call Trace: [ 250.502880] kasan_report+0xb6/0xf0 [ 250.503209] ? dget_parent+0x195/0x200 [ 250.503561] dget_parent+0x195/0x200 [ 250.503897] ? __pfx_rpc_clntdir_depopulate+0x10/0x10 [ 250.504384] rpc_rmdir_depopulate+0x1b/0x90 [ 250.504781] rpc_remove_client_dir+0xf5/0x150 [ 250.505195] rpc_free_client_work+0xe4/0x230 [ 250.505598] process_one_work+0x8ee/0x13b0 ... [ 22.039056] Allocated by task 244: [ 22.039390] kasan_save_stack+0x22/0x50 [ 22.039758] kasan_set_track+0x25/0x30 [ 22.040109] __kasan_slab_alloc+0x59/0x70 [ 22.040487] kmem_cache_alloc_lru+0xf0/0x240 [ 22.040889] __d_alloc+0x31/0x8e0 [ 22.041207] d_alloc+0x44/0x1f0 [ 22.041514] __rpc_lookup_create_exclusive+0x11c/0x140 [ 22.041987] rpc_mkdir_populate.constprop.0+0x5f/0x110 [ 22.042459] rpc_create_client_dir+0x34/0x150 [ 22.042874] rpc_setup_pipedir_sb+0x102/0x1c0 [ 22.043284] rpc_client_register+0x136/0x4e0 [ 22.043689] rpc_new_client+0x911/0x1020 [ 22.044057] rpc_create_xprt+0xcb/0x370 [ 22.044417] rpc_create+0x36b/0x6c0 ... [ 22.049524] Freed by task 0: [ 22.049803] kasan_save_stack+0x22/0x50 [ 22.050165] kasan_set_track+0x25/0x30 [ 22.050520] kasan_save_free_info+0x2b/0x50 [ 22.050921] __kasan_slab_free+0x10e/0x1a0 [ 22.051306] kmem_cache_free+0xa5/0x390 [ 22.051667] rcu_core+0x62c/0x1930 [ 22.051995] __do_softirq+0x165/0x52a [ 22.052347] [ 22.052503] Last potentially related work creation: [ 22.052952] kasan_save_stack+0x22/0x50 [ 22.053313] __kasan_record_aux_stack+0x8e/0xa0 [ 22.053739] __call_rcu_common.constprop.0+0x6b/0x8b0 [ 22.054209] dentry_free+0xb2/0x140 [ 22.054540] __dentry_kill+0x3be/0x540 [ 22.054900] shrink_dentry_list+0x199/0x510 [ 22.055293] shrink_dcache_parent+0x190/0x240 [ 22.055703] do_one_tree+0x11/0x40 [ 22.056028] shrink_dcache_for_umount+0x61/0x140 [ 22.056461] generic_shutdown_super+0x70/0x590 [ 22.056879] kill_anon_super+0x3a/0x60 [ 22.057234] rpc_kill_sb+0x121/0x200 Fixes: 0157d021d23a ("SUNRPC: handle RPC client pipefs dentries by network namespace aware routines") Signed-off-by: felix <fuzhen5@huawei.com> Signed-off-by: Trond Myklebust <trond.myklebust@hammerspace.com>
2023-10-23 04:40:19 +03:00
clnt->pipefs_sb = pipefs_sb;
if (clnt->cl_program->pipe_dir_name != NULL) {
dentry = rpc_setup_pipedir_sb(pipefs_sb, clnt);
if (IS_ERR(dentry))
return PTR_ERR(dentry);
}
return 0;
}
static int rpc_clnt_skip_event(struct rpc_clnt *clnt, unsigned long event)
{
if (clnt->cl_program->pipe_dir_name == NULL)
return 1;
switch (event) {
case RPC_PIPEFS_MOUNT:
if (clnt->cl_pipedir_objects.pdh_dentry != NULL)
return 1;
if (refcount_read(&clnt->cl_count) == 0)
return 1;
break;
case RPC_PIPEFS_UMOUNT:
if (clnt->cl_pipedir_objects.pdh_dentry == NULL)
return 1;
break;
}
return 0;
}
static int __rpc_clnt_handle_event(struct rpc_clnt *clnt, unsigned long event,
struct super_block *sb)
{
struct dentry *dentry;
switch (event) {
case RPC_PIPEFS_MOUNT:
dentry = rpc_setup_pipedir_sb(sb, clnt);
if (!dentry)
return -ENOENT;
if (IS_ERR(dentry))
return PTR_ERR(dentry);
break;
case RPC_PIPEFS_UMOUNT:
__rpc_clnt_remove_pipedir(clnt);
break;
default:
printk(KERN_ERR "%s: unknown event: %ld\n", __func__, event);
return -ENOTSUPP;
}
return 0;
}
static int __rpc_pipefs_event(struct rpc_clnt *clnt, unsigned long event,
struct super_block *sb)
{
int error = 0;
for (;; clnt = clnt->cl_parent) {
if (!rpc_clnt_skip_event(clnt, event))
error = __rpc_clnt_handle_event(clnt, event, sb);
if (error || clnt == clnt->cl_parent)
break;
}
return error;
}
static struct rpc_clnt *rpc_get_client_for_event(struct net *net, int event)
{
struct sunrpc_net *sn = net_generic(net, sunrpc_net_id);
struct rpc_clnt *clnt;
spin_lock(&sn->rpc_client_lock);
list_for_each_entry(clnt, &sn->all_clients, cl_clients) {
if (rpc_clnt_skip_event(clnt, event))
continue;
spin_unlock(&sn->rpc_client_lock);
return clnt;
}
spin_unlock(&sn->rpc_client_lock);
return NULL;
}
static int rpc_pipefs_event(struct notifier_block *nb, unsigned long event,
void *ptr)
{
struct super_block *sb = ptr;
struct rpc_clnt *clnt;
int error = 0;
while ((clnt = rpc_get_client_for_event(sb->s_fs_info, event))) {
error = __rpc_pipefs_event(clnt, event, sb);
if (error)
break;
}
return error;
}
static struct notifier_block rpc_clients_block = {
.notifier_call = rpc_pipefs_event,
.priority = SUNRPC_PIPEFS_RPC_PRIO,
};
int rpc_clients_notifier_register(void)
{
return rpc_pipefs_notifier_register(&rpc_clients_block);
}
void rpc_clients_notifier_unregister(void)
{
return rpc_pipefs_notifier_unregister(&rpc_clients_block);
}
static struct rpc_xprt *rpc_clnt_set_transport(struct rpc_clnt *clnt,
struct rpc_xprt *xprt,
const struct rpc_timeout *timeout)
{
struct rpc_xprt *old;
spin_lock(&clnt->cl_lock);
old = rcu_dereference_protected(clnt->cl_xprt,
lockdep_is_held(&clnt->cl_lock));
if (!xprt_bound(xprt))
clnt->cl_autobind = 1;
clnt->cl_timeout = timeout;
rcu_assign_pointer(clnt->cl_xprt, xprt);
spin_unlock(&clnt->cl_lock);
return old;
}
static void rpc_clnt_set_nodename(struct rpc_clnt *clnt, const char *nodename)
{
SUNRPC: Replace strlcpy() with strscpy() strlcpy() reads the entire source buffer first. This read may exceed the destination size limit. This is both inefficient and can lead to linear read overflows if a source string is not NUL-terminated[1]. Additionally, it returns the size of the source string, not the resulting size of the destination string. In an effort to remove strlcpy() completely[2], replace strlcpy() here with strscpy(). Explicitly handle the truncation case by returning the size of the resulting string. If "nodename" was ever longer than sizeof(clnt->cl_nodename) - 1, this change will fix a bug where clnt->cl_nodelen would end up thinking there were more characters in clnt->cl_nodename than there actually were, which might have lead to kernel memory content exposures. Cc: Trond Myklebust <trond.myklebust@hammerspace.com> Cc: Anna Schumaker <anna@kernel.org> Cc: Chuck Lever <chuck.lever@oracle.com> Cc: Jeff Layton <jlayton@kernel.org> Cc: Neil Brown <neilb@suse.de> Cc: Olga Kornievskaia <kolga@netapp.com> Cc: Dai Ngo <Dai.Ngo@oracle.com> Cc: Tom Talpey <tom@talpey.com> Cc: "David S. Miller" <davem@davemloft.net> Cc: Eric Dumazet <edumazet@google.com> Cc: Jakub Kicinski <kuba@kernel.org> Cc: Paolo Abeni <pabeni@redhat.com> Cc: linux-nfs@vger.kernel.org Cc: netdev@vger.kernel.org Link: https://www.kernel.org/doc/html/latest/process/deprecated.html#strlcpy [1] Link: https://github.com/KSPP/linux/issues/89 [2] Co-developed-by: Azeem Shaikh <azeemshaikh38@gmail.com> Signed-off-by: Azeem Shaikh <azeemshaikh38@gmail.com> Reviewed-by: NeilBrown <neilb@suse.de> Link: https://lore.kernel.org/r/20231114175407.work.410-kees@kernel.org Signed-off-by: Kees Cook <keescook@chromium.org>
2023-11-14 20:54:18 +03:00
ssize_t copied;
copied = strscpy(clnt->cl_nodename,
nodename, sizeof(clnt->cl_nodename));
clnt->cl_nodelen = copied < 0
? sizeof(clnt->cl_nodename) - 1
: copied;
}
static int rpc_client_register(struct rpc_clnt *clnt,
rpc_authflavor_t pseudoflavor,
const char *client_name)
{
struct rpc_auth_create_args auth_args = {
.pseudoflavor = pseudoflavor,
.target_name = client_name,
};
struct rpc_auth *auth;
struct net *net = rpc_net_ns(clnt);
struct super_block *pipefs_sb;
int err;
rpc_clnt_debugfs_register(clnt);
pipefs_sb = rpc_get_sb_net(net);
if (pipefs_sb) {
err = rpc_setup_pipedir(pipefs_sb, clnt);
if (err)
goto out;
}
rpc_register_client(clnt);
if (pipefs_sb)
rpc_put_sb_net(net);
auth = rpcauth_create(&auth_args, clnt);
if (IS_ERR(auth)) {
dprintk("RPC: Couldn't create auth handle (flavor %u)\n",
pseudoflavor);
err = PTR_ERR(auth);
goto err_auth;
}
return 0;
err_auth:
pipefs_sb = rpc_get_sb_net(net);
rpc_unregister_client(clnt);
__rpc_clnt_remove_pipedir(clnt);
out:
if (pipefs_sb)
rpc_put_sb_net(net);
rpc_sysfs_client_destroy(clnt);
rpc_clnt_debugfs_unregister(clnt);
return err;
}
static DEFINE_IDA(rpc_clids);
void rpc_cleanup_clids(void)
{
ida_destroy(&rpc_clids);
}
static int rpc_alloc_clid(struct rpc_clnt *clnt)
{
int clid;
clid = ida_alloc(&rpc_clids, GFP_KERNEL);
if (clid < 0)
return clid;
clnt->cl_clid = clid;
return 0;
}
static void rpc_free_clid(struct rpc_clnt *clnt)
{
ida_free(&rpc_clids, clnt->cl_clid);
}
static struct rpc_clnt * rpc_new_client(const struct rpc_create_args *args,
struct rpc_xprt_switch *xps,
struct rpc_xprt *xprt,
struct rpc_clnt *parent)
{
const struct rpc_program *program = args->program;
const struct rpc_version *version;
struct rpc_clnt *clnt = NULL;
const struct rpc_timeout *timeout;
const char *nodename = args->nodename;
int err;
err = rpciod_up();
if (err)
goto out_no_rpciod;
err = -EINVAL;
if (args->version >= program->nrvers)
goto out_err;
version = program->version[args->version];
if (version == NULL)
goto out_err;
err = -ENOMEM;
clnt = kzalloc(sizeof(*clnt), GFP_KERNEL);
if (!clnt)
goto out_err;
clnt->cl_parent = parent ? : clnt;
clnt->cl_xprtsec = args->xprtsec;
err = rpc_alloc_clid(clnt);
if (err)
goto out_no_clid;
clnt->cl_cred = get_cred(args->cred);
clnt->cl_procinfo = version->procs;
clnt->cl_maxproc = version->nrprocs;
clnt->cl_prog = args->prognumber ? : program->number;
clnt->cl_vers = version->number;
clnt->cl_stats = args->stats ? : program->stats;
clnt->cl_metrics = rpc_alloc_iostats(clnt);
rpc_init_pipe_dir_head(&clnt->cl_pipedir_objects);
err = -ENOMEM;
if (clnt->cl_metrics == NULL)
goto out_no_stats;
clnt->cl_program = program;
INIT_LIST_HEAD(&clnt->cl_tasks);
spin_lock_init(&clnt->cl_lock);
timeout = xprt->timeout;
if (args->timeout != NULL) {
memcpy(&clnt->cl_timeout_default, args->timeout,
sizeof(clnt->cl_timeout_default));
timeout = &clnt->cl_timeout_default;
}
rpc_clnt_set_transport(clnt, xprt, timeout);
xprt->main = true;
xprt_iter_init(&clnt->cl_xpi, xps);
xprt_switch_put(xps);
clnt->cl_rtt = &clnt->cl_rtt_default;
rpc_init_rtt(&clnt->cl_rtt_default, clnt->cl_timeout->to_initval);
refcount_set(&clnt->cl_count, 1);
if (nodename == NULL)
nodename = utsname()->nodename;
/* save the nodename */
rpc_clnt_set_nodename(clnt, nodename);
rpc_sysfs_client_setup(clnt, xps, rpc_net_ns(clnt));
err = rpc_client_register(clnt, args->authflavor, args->client_name);
if (err)
goto out_no_path;
if (parent)
refcount_inc(&parent->cl_count);
trace_rpc_clnt_new(clnt, xprt, args);
return clnt;
out_no_path:
rpc_free_iostats(clnt->cl_metrics);
out_no_stats:
put_cred(clnt->cl_cred);
rpc_free_clid(clnt);
out_no_clid:
kfree(clnt);
out_err:
rpciod_down();
out_no_rpciod:
xprt_switch_put(xps);
xprt_put(xprt);
trace_rpc_clnt_new_err(program->name, args->servername, err);
return ERR_PTR(err);
}
static struct rpc_clnt *rpc_create_xprt(struct rpc_create_args *args,
struct rpc_xprt *xprt)
{
struct rpc_clnt *clnt = NULL;
struct rpc_xprt_switch *xps;
if (args->bc_xprt && args->bc_xprt->xpt_bc_xps) {
WARN_ON_ONCE(!(args->protocol & XPRT_TRANSPORT_BC));
xps = args->bc_xprt->xpt_bc_xps;
xprt_switch_get(xps);
} else {
xps = xprt_switch_alloc(xprt, GFP_KERNEL);
if (xps == NULL) {
xprt_put(xprt);
return ERR_PTR(-ENOMEM);
}
if (xprt->bc_xprt) {
xprt_switch_get(xps);
xprt->bc_xprt->xpt_bc_xps = xps;
}
}
clnt = rpc_new_client(args, xps, xprt, NULL);
if (IS_ERR(clnt))
return clnt;
if (!(args->flags & RPC_CLNT_CREATE_NOPING)) {
int err = rpc_ping(clnt);
if (err != 0) {
rpc_shutdown_client(clnt);
return ERR_PTR(err);
}
} else if (args->flags & RPC_CLNT_CREATE_CONNECTED) {
int err = rpc_ping_noreply(clnt);
if (err != 0) {
rpc_shutdown_client(clnt);
return ERR_PTR(err);
}
}
clnt->cl_softrtry = 1;
if (args->flags & (RPC_CLNT_CREATE_HARDRTRY|RPC_CLNT_CREATE_SOFTERR)) {
clnt->cl_softrtry = 0;
if (args->flags & RPC_CLNT_CREATE_SOFTERR)
clnt->cl_softerr = 1;
}
if (args->flags & RPC_CLNT_CREATE_AUTOBIND)
clnt->cl_autobind = 1;
if (args->flags & RPC_CLNT_CREATE_NO_RETRANS_TIMEOUT)
clnt->cl_noretranstimeo = 1;
if (args->flags & RPC_CLNT_CREATE_DISCRTRY)
clnt->cl_discrtry = 1;
if (!(args->flags & RPC_CLNT_CREATE_QUIET))
clnt->cl_chatty = 1;
return clnt;
}
/**
* rpc_create - create an RPC client and transport with one call
* @args: rpc_clnt create argument structure
*
* Creates and initializes an RPC transport and an RPC client.
*
* It can ping the server in order to determine if it is up, and to see if
* it supports this program and version. RPC_CLNT_CREATE_NOPING disables
* this behavior so asynchronous tasks can also use rpc_create.
*/
struct rpc_clnt *rpc_create(struct rpc_create_args *args)
{
struct rpc_xprt *xprt;
struct xprt_create xprtargs = {
.net = args->net,
.ident = args->protocol,
.srcaddr = args->saddress,
.dstaddr = args->address,
.addrlen = args->addrsize,
.servername = args->servername,
.bc_xprt = args->bc_xprt,
.xprtsec = args->xprtsec,
.connect_timeout = args->connect_timeout,
.reconnect_timeout = args->reconnect_timeout,
};
char servername[48];
struct rpc_clnt *clnt;
int i;
if (args->bc_xprt) {
WARN_ON_ONCE(!(args->protocol & XPRT_TRANSPORT_BC));
xprt = args->bc_xprt->xpt_bc_xprt;
if (xprt) {
xprt_get(xprt);
return rpc_create_xprt(args, xprt);
}
}
if (args->flags & RPC_CLNT_CREATE_INFINITE_SLOTS)
xprtargs.flags |= XPRT_CREATE_INFINITE_SLOTS;
if (args->flags & RPC_CLNT_CREATE_NO_IDLE_TIMEOUT)
xprtargs.flags |= XPRT_CREATE_NO_IDLE_TIMEOUT;
/*
* If the caller chooses not to specify a hostname, whip
* up a string representation of the passed-in address.
*/
if (xprtargs.servername == NULL) {
struct sockaddr_un *sun =
(struct sockaddr_un *)args->address;
struct sockaddr_in *sin =
(struct sockaddr_in *)args->address;
struct sockaddr_in6 *sin6 =
(struct sockaddr_in6 *)args->address;
servername[0] = '\0';
switch (args->address->sa_family) {
case AF_LOCAL:
if (sun->sun_path[0])
snprintf(servername, sizeof(servername), "%s",
sun->sun_path);
else
snprintf(servername, sizeof(servername), "@%s",
sun->sun_path+1);
break;
case AF_INET:
snprintf(servername, sizeof(servername), "%pI4",
&sin->sin_addr.s_addr);
break;
case AF_INET6:
snprintf(servername, sizeof(servername), "%pI6",
&sin6->sin6_addr);
break;
default:
/* caller wants default server name, but
* address family isn't recognized. */
return ERR_PTR(-EINVAL);
}
xprtargs.servername = servername;
}
xprt = xprt_create_transport(&xprtargs);
if (IS_ERR(xprt))
return (struct rpc_clnt *)xprt;
/*
* By default, kernel RPC client connects from a reserved port.
* CAP_NET_BIND_SERVICE will not be set for unprivileged requesters,
* but it is always enabled for rpciod, which handles the connect
* operation.
*/
xprt->resvport = 1;
if (args->flags & RPC_CLNT_CREATE_NONPRIVPORT)
xprt->resvport = 0;
xprt->reuseport = 0;
if (args->flags & RPC_CLNT_CREATE_REUSEPORT)
xprt->reuseport = 1;
clnt = rpc_create_xprt(args, xprt);
if (IS_ERR(clnt) || args->nconnect <= 1)
return clnt;
for (i = 0; i < args->nconnect - 1; i++) {
if (rpc_clnt_add_xprt(clnt, &xprtargs, NULL, NULL) < 0)
break;
}
return clnt;
}
EXPORT_SYMBOL_GPL(rpc_create);
/*
* This function clones the RPC client structure. It allows us to share the
* same transport while varying parameters such as the authentication
* flavour.
*/
static struct rpc_clnt *__rpc_clone_client(struct rpc_create_args *args,
struct rpc_clnt *clnt)
{
struct rpc_xprt_switch *xps;
struct rpc_xprt *xprt;
struct rpc_clnt *new;
int err;
err = -ENOMEM;
rcu_read_lock();
xprt = xprt_get(rcu_dereference(clnt->cl_xprt));
xps = xprt_switch_get(rcu_dereference(clnt->cl_xpi.xpi_xpswitch));
rcu_read_unlock();
if (xprt == NULL || xps == NULL) {
xprt_put(xprt);
xprt_switch_put(xps);
goto out_err;
}
args->servername = xprt->servername;
args->nodename = clnt->cl_nodename;
new = rpc_new_client(args, xps, xprt, clnt);
if (IS_ERR(new))
return new;
/* Turn off autobind on clones */
new->cl_autobind = 0;
new->cl_softrtry = clnt->cl_softrtry;
new->cl_softerr = clnt->cl_softerr;
new->cl_noretranstimeo = clnt->cl_noretranstimeo;
new->cl_discrtry = clnt->cl_discrtry;
new->cl_chatty = clnt->cl_chatty;
new->cl_principal = clnt->cl_principal;
new->cl_max_connect = clnt->cl_max_connect;
return new;
out_err:
trace_rpc_clnt_clone_err(clnt, err);
return ERR_PTR(err);
}
/**
* rpc_clone_client - Clone an RPC client structure
*
* @clnt: RPC client whose parameters are copied
*
* Returns a fresh RPC client or an ERR_PTR.
*/
struct rpc_clnt *rpc_clone_client(struct rpc_clnt *clnt)
{
struct rpc_create_args args = {
.program = clnt->cl_program,
.prognumber = clnt->cl_prog,
.version = clnt->cl_vers,
.authflavor = clnt->cl_auth->au_flavor,
.cred = clnt->cl_cred,
.stats = clnt->cl_stats,
};
return __rpc_clone_client(&args, clnt);
}
EXPORT_SYMBOL_GPL(rpc_clone_client);
/**
* rpc_clone_client_set_auth - Clone an RPC client structure and set its auth
*
* @clnt: RPC client whose parameters are copied
* @flavor: security flavor for new client
*
* Returns a fresh RPC client or an ERR_PTR.
*/
struct rpc_clnt *
rpc_clone_client_set_auth(struct rpc_clnt *clnt, rpc_authflavor_t flavor)
{
struct rpc_create_args args = {
.program = clnt->cl_program,
.prognumber = clnt->cl_prog,
.version = clnt->cl_vers,
.authflavor = flavor,
.cred = clnt->cl_cred,
.stats = clnt->cl_stats,
};
return __rpc_clone_client(&args, clnt);
}
EXPORT_SYMBOL_GPL(rpc_clone_client_set_auth);
/**
* rpc_switch_client_transport: switch the RPC transport on the fly
* @clnt: pointer to a struct rpc_clnt
* @args: pointer to the new transport arguments
* @timeout: pointer to the new timeout parameters
*
* This function allows the caller to switch the RPC transport for the
* rpc_clnt structure 'clnt' to allow it to connect to a mirrored NFS
* server, for instance. It assumes that the caller has ensured that
* there are no active RPC tasks by using some form of locking.
*
* Returns zero if "clnt" is now using the new xprt. Otherwise a
* negative errno is returned, and "clnt" continues to use the old
* xprt.
*/
int rpc_switch_client_transport(struct rpc_clnt *clnt,
struct xprt_create *args,
const struct rpc_timeout *timeout)
{
const struct rpc_timeout *old_timeo;
rpc_authflavor_t pseudoflavor;
struct rpc_xprt_switch *xps, *oldxps;
struct rpc_xprt *xprt, *old;
struct rpc_clnt *parent;
int err;
args->xprtsec = clnt->cl_xprtsec;
xprt = xprt_create_transport(args);
if (IS_ERR(xprt))
return PTR_ERR(xprt);
xps = xprt_switch_alloc(xprt, GFP_KERNEL);
if (xps == NULL) {
xprt_put(xprt);
return -ENOMEM;
}
pseudoflavor = clnt->cl_auth->au_flavor;
old_timeo = clnt->cl_timeout;
old = rpc_clnt_set_transport(clnt, xprt, timeout);
oldxps = xprt_iter_xchg_switch(&clnt->cl_xpi, xps);
rpc_unregister_client(clnt);
__rpc_clnt_remove_pipedir(clnt);
rpc_sysfs_client_destroy(clnt);
rpc_clnt_debugfs_unregister(clnt);
/*
* A new transport was created. "clnt" therefore
* becomes the root of a new cl_parent tree. clnt's
* children, if it has any, still point to the old xprt.
*/
parent = clnt->cl_parent;
clnt->cl_parent = clnt;
/*
* The old rpc_auth cache cannot be re-used. GSS
* contexts in particular are between a single
* client and server.
*/
err = rpc_client_register(clnt, pseudoflavor, NULL);
if (err)
goto out_revert;
synchronize_rcu();
if (parent != clnt)
rpc_release_client(parent);
xprt_switch_put(oldxps);
xprt_put(old);
trace_rpc_clnt_replace_xprt(clnt);
return 0;
out_revert:
xps = xprt_iter_xchg_switch(&clnt->cl_xpi, oldxps);
rpc_clnt_set_transport(clnt, old, old_timeo);
clnt->cl_parent = parent;
rpc_client_register(clnt, pseudoflavor, NULL);
xprt_switch_put(xps);
xprt_put(xprt);
trace_rpc_clnt_replace_xprt_err(clnt);
return err;
}
EXPORT_SYMBOL_GPL(rpc_switch_client_transport);
static struct rpc_xprt_switch *rpc_clnt_xprt_switch_get(struct rpc_clnt *clnt)
{
struct rpc_xprt_switch *xps;
rcu_read_lock();
xps = xprt_switch_get(rcu_dereference(clnt->cl_xpi.xpi_xpswitch));
rcu_read_unlock();
return xps;
}
static
int _rpc_clnt_xprt_iter_init(struct rpc_clnt *clnt, struct rpc_xprt_iter *xpi,
void func(struct rpc_xprt_iter *xpi, struct rpc_xprt_switch *xps))
{
struct rpc_xprt_switch *xps;
xps = rpc_clnt_xprt_switch_get(clnt);
if (xps == NULL)
return -EAGAIN;
func(xpi, xps);
xprt_switch_put(xps);
return 0;
}
static
int rpc_clnt_xprt_iter_init(struct rpc_clnt *clnt, struct rpc_xprt_iter *xpi)
{
return _rpc_clnt_xprt_iter_init(clnt, xpi, xprt_iter_init_listall);
}
static
int rpc_clnt_xprt_iter_offline_init(struct rpc_clnt *clnt,
struct rpc_xprt_iter *xpi)
{
return _rpc_clnt_xprt_iter_init(clnt, xpi, xprt_iter_init_listoffline);
}
/**
* rpc_clnt_iterate_for_each_xprt - Apply a function to all transports
* @clnt: pointer to client
* @fn: function to apply
* @data: void pointer to function data
*
* Iterates through the list of RPC transports currently attached to the
* client and applies the function fn(clnt, xprt, data).
*
* On error, the iteration stops, and the function returns the error value.
*/
int rpc_clnt_iterate_for_each_xprt(struct rpc_clnt *clnt,
int (*fn)(struct rpc_clnt *, struct rpc_xprt *, void *),
void *data)
{
struct rpc_xprt_iter xpi;
int ret;
ret = rpc_clnt_xprt_iter_init(clnt, &xpi);
if (ret)
return ret;
for (;;) {
struct rpc_xprt *xprt = xprt_iter_get_next(&xpi);
if (!xprt)
break;
ret = fn(clnt, xprt, data);
xprt_put(xprt);
if (ret < 0)
break;
}
xprt_iter_destroy(&xpi);
return ret;
}
EXPORT_SYMBOL_GPL(rpc_clnt_iterate_for_each_xprt);
/*
* Kill all tasks for the given client.
* XXX: kill their descendants as well?
*/
void rpc_killall_tasks(struct rpc_clnt *clnt)
{
struct rpc_task *rovr;
if (list_empty(&clnt->cl_tasks))
return;
/*
* Spin lock all_tasks to prevent changes...
*/
trace_rpc_clnt_killall(clnt);
spin_lock(&clnt->cl_lock);
list_for_each_entry(rovr, &clnt->cl_tasks, tk_task)
rpc_signal_task(rovr);
spin_unlock(&clnt->cl_lock);
}
EXPORT_SYMBOL_GPL(rpc_killall_tasks);
/**
* rpc_cancel_tasks - try to cancel a set of RPC tasks
* @clnt: Pointer to RPC client
* @error: RPC task error value to set
* @fnmatch: Pointer to selector function
* @data: User data
*
* Uses @fnmatch to define a set of RPC tasks that are to be cancelled.
* The argument @error must be a negative error value.
*/
unsigned long rpc_cancel_tasks(struct rpc_clnt *clnt, int error,
bool (*fnmatch)(const struct rpc_task *,
const void *),
const void *data)
{
struct rpc_task *task;
unsigned long count = 0;
if (list_empty(&clnt->cl_tasks))
return 0;
/*
* Spin lock all_tasks to prevent changes...
*/
spin_lock(&clnt->cl_lock);
list_for_each_entry(task, &clnt->cl_tasks, tk_task) {
if (!RPC_IS_ACTIVATED(task))
continue;
if (!fnmatch(task, data))
continue;
rpc_task_try_cancel(task, error);
count++;
}
spin_unlock(&clnt->cl_lock);
return count;
}
EXPORT_SYMBOL_GPL(rpc_cancel_tasks);
static int rpc_clnt_disconnect_xprt(struct rpc_clnt *clnt,
struct rpc_xprt *xprt, void *dummy)
{
if (xprt_connected(xprt))
xprt_force_disconnect(xprt);
return 0;
}
void rpc_clnt_disconnect(struct rpc_clnt *clnt)
{
rpc_clnt_iterate_for_each_xprt(clnt, rpc_clnt_disconnect_xprt, NULL);
}
EXPORT_SYMBOL_GPL(rpc_clnt_disconnect);
/*
* Properly shut down an RPC client, terminating all outstanding
* requests.
*/
void rpc_shutdown_client(struct rpc_clnt *clnt)
{
might_sleep();
trace_rpc_clnt_shutdown(clnt);
while (!list_empty(&clnt->cl_tasks)) {
rpc_killall_tasks(clnt);
wait_event_timeout(destroy_wait,
list_empty(&clnt->cl_tasks), 1*HZ);
}
rpc_release_client(clnt);
}
EXPORT_SYMBOL_GPL(rpc_shutdown_client);
/*
* Free an RPC client
*/
SUNRPC: defer slow parts of rpc_free_client() to a workqueue. The rpciod workqueue is on the write-out path for freeing dirty memory, so it is important that it never block waiting for memory to be allocated - this can lead to a deadlock. rpc_execute() - which is often called by an rpciod work item - calls rcp_task_release_client() which can lead to rpc_free_client(). rpc_free_client() makes two calls which could potentially block wating for memory allocation. rpc_clnt_debugfs_unregister() calls into debugfs and will block while any of the debugfs files are being accessed. In particular it can block while any of the 'open' methods are being called and all of these use malloc for one thing or another. So this can deadlock if the memory allocation waits for NFS to complete some writes via rpciod. rpc_clnt_remove_pipedir() can take the inode_lock() and while it isn't obvious that memory allocations can happen while the lock it held, it is safer to assume they might and to not let rpciod call rpc_clnt_remove_pipedir(). So this patch moves these two calls (together with the final kfree() and rpciod_down()) into a work-item to be run from the system work-queue. rpciod can continue its important work, and the final stages of the free can happen whenever they happen. I have seen this deadlock on a 4.12 based kernel where debugfs used synchronize_srcu() when removing objects. synchronize_srcu() requires a workqueue and there were no free workther threads and none could be allocated. While debugsfs no longer uses SRCU, I believe the deadlock is still possible. Signed-off-by: NeilBrown <neilb@suse.de> Signed-off-by: Trond Myklebust <trond.myklebust@hammerspace.com>
2020-04-03 06:33:41 +03:00
static void rpc_free_client_work(struct work_struct *work)
{
struct rpc_clnt *clnt = container_of(work, struct rpc_clnt, cl_work);
trace_rpc_clnt_free(clnt);
SUNRPC: defer slow parts of rpc_free_client() to a workqueue. The rpciod workqueue is on the write-out path for freeing dirty memory, so it is important that it never block waiting for memory to be allocated - this can lead to a deadlock. rpc_execute() - which is often called by an rpciod work item - calls rcp_task_release_client() which can lead to rpc_free_client(). rpc_free_client() makes two calls which could potentially block wating for memory allocation. rpc_clnt_debugfs_unregister() calls into debugfs and will block while any of the debugfs files are being accessed. In particular it can block while any of the 'open' methods are being called and all of these use malloc for one thing or another. So this can deadlock if the memory allocation waits for NFS to complete some writes via rpciod. rpc_clnt_remove_pipedir() can take the inode_lock() and while it isn't obvious that memory allocations can happen while the lock it held, it is safer to assume they might and to not let rpciod call rpc_clnt_remove_pipedir(). So this patch moves these two calls (together with the final kfree() and rpciod_down()) into a work-item to be run from the system work-queue. rpciod can continue its important work, and the final stages of the free can happen whenever they happen. I have seen this deadlock on a 4.12 based kernel where debugfs used synchronize_srcu() when removing objects. synchronize_srcu() requires a workqueue and there were no free workther threads and none could be allocated. While debugsfs no longer uses SRCU, I believe the deadlock is still possible. Signed-off-by: NeilBrown <neilb@suse.de> Signed-off-by: Trond Myklebust <trond.myklebust@hammerspace.com>
2020-04-03 06:33:41 +03:00
/* These might block on processes that might allocate memory,
* so they cannot be called in rpciod, so they are handled separately
* here.
*/
rpc_sysfs_client_destroy(clnt);
SUNRPC: defer slow parts of rpc_free_client() to a workqueue. The rpciod workqueue is on the write-out path for freeing dirty memory, so it is important that it never block waiting for memory to be allocated - this can lead to a deadlock. rpc_execute() - which is often called by an rpciod work item - calls rcp_task_release_client() which can lead to rpc_free_client(). rpc_free_client() makes two calls which could potentially block wating for memory allocation. rpc_clnt_debugfs_unregister() calls into debugfs and will block while any of the debugfs files are being accessed. In particular it can block while any of the 'open' methods are being called and all of these use malloc for one thing or another. So this can deadlock if the memory allocation waits for NFS to complete some writes via rpciod. rpc_clnt_remove_pipedir() can take the inode_lock() and while it isn't obvious that memory allocations can happen while the lock it held, it is safer to assume they might and to not let rpciod call rpc_clnt_remove_pipedir(). So this patch moves these two calls (together with the final kfree() and rpciod_down()) into a work-item to be run from the system work-queue. rpciod can continue its important work, and the final stages of the free can happen whenever they happen. I have seen this deadlock on a 4.12 based kernel where debugfs used synchronize_srcu() when removing objects. synchronize_srcu() requires a workqueue and there were no free workther threads and none could be allocated. While debugsfs no longer uses SRCU, I believe the deadlock is still possible. Signed-off-by: NeilBrown <neilb@suse.de> Signed-off-by: Trond Myklebust <trond.myklebust@hammerspace.com>
2020-04-03 06:33:41 +03:00
rpc_clnt_debugfs_unregister(clnt);
rpc_free_clid(clnt);
SUNRPC: defer slow parts of rpc_free_client() to a workqueue. The rpciod workqueue is on the write-out path for freeing dirty memory, so it is important that it never block waiting for memory to be allocated - this can lead to a deadlock. rpc_execute() - which is often called by an rpciod work item - calls rcp_task_release_client() which can lead to rpc_free_client(). rpc_free_client() makes two calls which could potentially block wating for memory allocation. rpc_clnt_debugfs_unregister() calls into debugfs and will block while any of the debugfs files are being accessed. In particular it can block while any of the 'open' methods are being called and all of these use malloc for one thing or another. So this can deadlock if the memory allocation waits for NFS to complete some writes via rpciod. rpc_clnt_remove_pipedir() can take the inode_lock() and while it isn't obvious that memory allocations can happen while the lock it held, it is safer to assume they might and to not let rpciod call rpc_clnt_remove_pipedir(). So this patch moves these two calls (together with the final kfree() and rpciod_down()) into a work-item to be run from the system work-queue. rpciod can continue its important work, and the final stages of the free can happen whenever they happen. I have seen this deadlock on a 4.12 based kernel where debugfs used synchronize_srcu() when removing objects. synchronize_srcu() requires a workqueue and there were no free workther threads and none could be allocated. While debugsfs no longer uses SRCU, I believe the deadlock is still possible. Signed-off-by: NeilBrown <neilb@suse.de> Signed-off-by: Trond Myklebust <trond.myklebust@hammerspace.com>
2020-04-03 06:33:41 +03:00
rpc_clnt_remove_pipedir(clnt);
xprt_put(rcu_dereference_raw(clnt->cl_xprt));
SUNRPC: defer slow parts of rpc_free_client() to a workqueue. The rpciod workqueue is on the write-out path for freeing dirty memory, so it is important that it never block waiting for memory to be allocated - this can lead to a deadlock. rpc_execute() - which is often called by an rpciod work item - calls rcp_task_release_client() which can lead to rpc_free_client(). rpc_free_client() makes two calls which could potentially block wating for memory allocation. rpc_clnt_debugfs_unregister() calls into debugfs and will block while any of the debugfs files are being accessed. In particular it can block while any of the 'open' methods are being called and all of these use malloc for one thing or another. So this can deadlock if the memory allocation waits for NFS to complete some writes via rpciod. rpc_clnt_remove_pipedir() can take the inode_lock() and while it isn't obvious that memory allocations can happen while the lock it held, it is safer to assume they might and to not let rpciod call rpc_clnt_remove_pipedir(). So this patch moves these two calls (together with the final kfree() and rpciod_down()) into a work-item to be run from the system work-queue. rpciod can continue its important work, and the final stages of the free can happen whenever they happen. I have seen this deadlock on a 4.12 based kernel where debugfs used synchronize_srcu() when removing objects. synchronize_srcu() requires a workqueue and there were no free workther threads and none could be allocated. While debugsfs no longer uses SRCU, I believe the deadlock is still possible. Signed-off-by: NeilBrown <neilb@suse.de> Signed-off-by: Trond Myklebust <trond.myklebust@hammerspace.com>
2020-04-03 06:33:41 +03:00
kfree(clnt);
rpciod_down();
}
static struct rpc_clnt *
rpc_free_client(struct rpc_clnt *clnt)
{
struct rpc_clnt *parent = NULL;
trace_rpc_clnt_release(clnt);
if (clnt->cl_parent != clnt)
parent = clnt->cl_parent;
rpc_unregister_client(clnt);
rpc_free_iostats(clnt->cl_metrics);
clnt->cl_metrics = NULL;
xprt_iter_destroy(&clnt->cl_xpi);
put_cred(clnt->cl_cred);
SUNRPC: defer slow parts of rpc_free_client() to a workqueue. The rpciod workqueue is on the write-out path for freeing dirty memory, so it is important that it never block waiting for memory to be allocated - this can lead to a deadlock. rpc_execute() - which is often called by an rpciod work item - calls rcp_task_release_client() which can lead to rpc_free_client(). rpc_free_client() makes two calls which could potentially block wating for memory allocation. rpc_clnt_debugfs_unregister() calls into debugfs and will block while any of the debugfs files are being accessed. In particular it can block while any of the 'open' methods are being called and all of these use malloc for one thing or another. So this can deadlock if the memory allocation waits for NFS to complete some writes via rpciod. rpc_clnt_remove_pipedir() can take the inode_lock() and while it isn't obvious that memory allocations can happen while the lock it held, it is safer to assume they might and to not let rpciod call rpc_clnt_remove_pipedir(). So this patch moves these two calls (together with the final kfree() and rpciod_down()) into a work-item to be run from the system work-queue. rpciod can continue its important work, and the final stages of the free can happen whenever they happen. I have seen this deadlock on a 4.12 based kernel where debugfs used synchronize_srcu() when removing objects. synchronize_srcu() requires a workqueue and there were no free workther threads and none could be allocated. While debugsfs no longer uses SRCU, I believe the deadlock is still possible. Signed-off-by: NeilBrown <neilb@suse.de> Signed-off-by: Trond Myklebust <trond.myklebust@hammerspace.com>
2020-04-03 06:33:41 +03:00
INIT_WORK(&clnt->cl_work, rpc_free_client_work);
schedule_work(&clnt->cl_work);
return parent;
}
/*
* Free an RPC client
*/
static struct rpc_clnt *
rpc_free_auth(struct rpc_clnt *clnt)
{
/*
* Note: RPCSEC_GSS may need to send NULL RPC calls in order to
* release remaining GSS contexts. This mechanism ensures
* that it can do so safely.
*/
if (clnt->cl_auth != NULL) {
rpcauth_release(clnt->cl_auth);
clnt->cl_auth = NULL;
}
if (refcount_dec_and_test(&clnt->cl_count))
return rpc_free_client(clnt);
return NULL;
}
/*
* Release reference to the RPC client
*/
void
rpc_release_client(struct rpc_clnt *clnt)
{
do {
if (list_empty(&clnt->cl_tasks))
wake_up(&destroy_wait);
if (refcount_dec_not_one(&clnt->cl_count))
break;
clnt = rpc_free_auth(clnt);
} while (clnt != NULL);
}
EXPORT_SYMBOL_GPL(rpc_release_client);
/**
* rpc_bind_new_program - bind a new RPC program to an existing client
* @old: old rpc_client
* @program: rpc program to set
* @vers: rpc program version
*
* Clones the rpc client and sets up a new RPC program. This is mainly
* of use for enabling different RPC programs to share the same transport.
* The Sun NFSv2/v3 ACL protocol can do this.
*/
struct rpc_clnt *rpc_bind_new_program(struct rpc_clnt *old,
const struct rpc_program *program,
u32 vers)
{
struct rpc_create_args args = {
.program = program,
.prognumber = program->number,
.version = vers,
.authflavor = old->cl_auth->au_flavor,
.cred = old->cl_cred,
.stats = old->cl_stats,
.timeout = old->cl_timeout,
};
struct rpc_clnt *clnt;
int err;
clnt = __rpc_clone_client(&args, old);
if (IS_ERR(clnt))
goto out;
err = rpc_ping(clnt);
if (err != 0) {
rpc_shutdown_client(clnt);
clnt = ERR_PTR(err);
}
out:
return clnt;
}
EXPORT_SYMBOL_GPL(rpc_bind_new_program);
struct rpc_xprt *
rpc_task_get_xprt(struct rpc_clnt *clnt, struct rpc_xprt *xprt)
{
struct rpc_xprt_switch *xps;
if (!xprt)
return NULL;
rcu_read_lock();
xps = rcu_dereference(clnt->cl_xpi.xpi_xpswitch);
atomic_long_inc(&xps->xps_queuelen);
rcu_read_unlock();
atomic_long_inc(&xprt->queuelen);
return xprt;
}
static void
rpc_task_release_xprt(struct rpc_clnt *clnt, struct rpc_xprt *xprt)
{
struct rpc_xprt_switch *xps;
atomic_long_dec(&xprt->queuelen);
rcu_read_lock();
xps = rcu_dereference(clnt->cl_xpi.xpi_xpswitch);
atomic_long_dec(&xps->xps_queuelen);
rcu_read_unlock();
xprt_put(xprt);
}
void rpc_task_release_transport(struct rpc_task *task)
{
struct rpc_xprt *xprt = task->tk_xprt;
if (xprt) {
task->tk_xprt = NULL;
if (task->tk_client)
rpc_task_release_xprt(task->tk_client, xprt);
else
xprt_put(xprt);
}
}
EXPORT_SYMBOL_GPL(rpc_task_release_transport);
void rpc_task_release_client(struct rpc_task *task)
{
struct rpc_clnt *clnt = task->tk_client;
rpc_task_release_transport(task);
if (clnt != NULL) {
/* Remove from client task list */
spin_lock(&clnt->cl_lock);
list_del(&task->tk_task);
spin_unlock(&clnt->cl_lock);
task->tk_client = NULL;
rpc_release_client(clnt);
}
}
static struct rpc_xprt *
rpc_task_get_first_xprt(struct rpc_clnt *clnt)
{
struct rpc_xprt *xprt;
rcu_read_lock();
xprt = xprt_get(rcu_dereference(clnt->cl_xprt));
rcu_read_unlock();
return rpc_task_get_xprt(clnt, xprt);
}
static struct rpc_xprt *
rpc_task_get_next_xprt(struct rpc_clnt *clnt)
{
return rpc_task_get_xprt(clnt, xprt_iter_get_next(&clnt->cl_xpi));
}
static
void rpc_task_set_transport(struct rpc_task *task, struct rpc_clnt *clnt)
{
if (task->tk_xprt) {
if (!(test_bit(XPRT_OFFLINE, &task->tk_xprt->state) &&
(task->tk_flags & RPC_TASK_MOVEABLE)))
return;
xprt_release(task);
xprt_put(task->tk_xprt);
}
if (task->tk_flags & RPC_TASK_NO_ROUND_ROBIN)
task->tk_xprt = rpc_task_get_first_xprt(clnt);
else
task->tk_xprt = rpc_task_get_next_xprt(clnt);
}
static
void rpc_task_set_client(struct rpc_task *task, struct rpc_clnt *clnt)
{
rpc_task_set_transport(task, clnt);
task->tk_client = clnt;
refcount_inc(&clnt->cl_count);
if (clnt->cl_softrtry)
task->tk_flags |= RPC_TASK_SOFT;
if (clnt->cl_softerr)
task->tk_flags |= RPC_TASK_TIMEOUT;
if (clnt->cl_noretranstimeo)
task->tk_flags |= RPC_TASK_NO_RETRANS_TIMEOUT;
/* Add to the client's list of all tasks */
spin_lock(&clnt->cl_lock);
list_add_tail(&task->tk_task, &clnt->cl_tasks);
spin_unlock(&clnt->cl_lock);
}
static void
rpc_task_set_rpc_message(struct rpc_task *task, const struct rpc_message *msg)
{
if (msg != NULL) {
task->tk_msg.rpc_proc = msg->rpc_proc;
task->tk_msg.rpc_argp = msg->rpc_argp;
task->tk_msg.rpc_resp = msg->rpc_resp;
task->tk_msg.rpc_cred = msg->rpc_cred;
if (!(task->tk_flags & RPC_TASK_CRED_NOREF))
get_cred(task->tk_msg.rpc_cred);
}
}
/*
* Default callback for async RPC calls
*/
static void
rpc_default_callback(struct rpc_task *task, void *data)
{
}
static const struct rpc_call_ops rpc_default_ops = {
.rpc_call_done = rpc_default_callback,
};
/**
* rpc_run_task - Allocate a new RPC task, then run rpc_execute against it
* @task_setup_data: pointer to task initialisation data
*/
struct rpc_task *rpc_run_task(const struct rpc_task_setup *task_setup_data)
{
struct rpc_task *task;
task = rpc_new_task(task_setup_data);
if (IS_ERR(task))
return task;
if (!RPC_IS_ASYNC(task))
task->tk_flags |= RPC_TASK_CRED_NOREF;
rpc_task_set_client(task, task_setup_data->rpc_client);
rpc_task_set_rpc_message(task, task_setup_data->rpc_message);
if (task->tk_action == NULL)
rpc_call_start(task);
atomic_inc(&task->tk_count);
rpc_execute(task);
return task;
}
EXPORT_SYMBOL_GPL(rpc_run_task);
/**
* rpc_call_sync - Perform a synchronous RPC call
* @clnt: pointer to RPC client
* @msg: RPC call parameters
* @flags: RPC call flags
*/
int rpc_call_sync(struct rpc_clnt *clnt, const struct rpc_message *msg, int flags)
{
struct rpc_task *task;
struct rpc_task_setup task_setup_data = {
.rpc_client = clnt,
.rpc_message = msg,
.callback_ops = &rpc_default_ops,
.flags = flags,
};
int status;
WARN_ON_ONCE(flags & RPC_TASK_ASYNC);
if (flags & RPC_TASK_ASYNC) {
rpc_release_calldata(task_setup_data.callback_ops,
task_setup_data.callback_data);
return -EINVAL;
}
task = rpc_run_task(&task_setup_data);
if (IS_ERR(task))
return PTR_ERR(task);
status = task->tk_status;
rpc_put_task(task);
return status;
}
EXPORT_SYMBOL_GPL(rpc_call_sync);
/**
* rpc_call_async - Perform an asynchronous RPC call
* @clnt: pointer to RPC client
* @msg: RPC call parameters
* @flags: RPC call flags
* @tk_ops: RPC call ops
* @data: user call data
*/
int
rpc_call_async(struct rpc_clnt *clnt, const struct rpc_message *msg, int flags,
const struct rpc_call_ops *tk_ops, void *data)
{
struct rpc_task *task;
struct rpc_task_setup task_setup_data = {
.rpc_client = clnt,
.rpc_message = msg,
.callback_ops = tk_ops,
.callback_data = data,
.flags = flags|RPC_TASK_ASYNC,
};
task = rpc_run_task(&task_setup_data);
if (IS_ERR(task))
return PTR_ERR(task);
rpc_put_task(task);
return 0;
}
EXPORT_SYMBOL_GPL(rpc_call_async);
#if defined(CONFIG_SUNRPC_BACKCHANNEL)
static void call_bc_encode(struct rpc_task *task);
nfs41: Add backchannel processing support to RPC state machine Adds rpc_run_bc_task() which is called by the NFS callback service to process backchannel requests. It performs similar work to rpc_run_task() though "schedules" the backchannel task to be executed starting at the call_trasmit state in the RPC state machine. It also introduces some miscellaneous updates to the argument validation, call_transmit, and transport cleanup functions to take into account that there are now forechannel and backchannel tasks. Backchannel requests do not carry an RPC message structure, since the payload has already been XDR encoded using the existing NFSv4 callback mechanism. Introduce a new transmit state for the client to reply on to backchannel requests. This new state simply reserves the transport and issues the reply. In case of a connection related error, disconnects the transport and drops the reply. It requires the forechannel to re-establish the connection and the server to retransmit the request, as stated in NFSv4.1 section 2.9.2 "Client and Server Transport Behavior". Note: There is no need to loop attempting to reserve the transport. If EAGAIN is returned by xprt_prepare_transmit(), return with tk_status == 0, setting tk_action to call_bc_transmit. rpc_execute() will invoke it again after the task is taken off the sleep queue. [nfs41: rpc_run_bc_task() need not be exported outside RPC module] [nfs41: New call_bc_transmit RPC state] Signed-off-by: Ricardo Labiaga <Ricardo.Labiaga@netapp.com> Signed-off-by: Benny Halevy <bhalevy@panasas.com> [nfs41: Backchannel: No need to loop in call_bc_transmit()] Signed-off-by: Andy Adamson <andros@netapp.com> Signed-off-by: Ricardo Labiaga <Ricardo.Labiaga@netapp.com> Signed-off-by: Benny Halevy <bhalevy@panasas.com> [rpc_count_iostats incorrectly exits early] Signed-off-by: Ricardo Labiaga <Ricardo.Labiaga@netapp.com> Signed-off-by: Benny Halevy <bhalevy@panasas.com> [Convert rpc_reply_expected() to inline function] [Remove unnecessary BUG_ON()] [Rename variable] Signed-off-by: Ricardo Labiaga <Ricardo.Labiaga@netapp.com> Signed-off-by: Benny Halevy <bhalevy@panasas.com>
2009-04-01 17:23:03 +04:00
/**
* rpc_run_bc_task - Allocate a new RPC task for backchannel use, then run
* rpc_execute against it
* @req: RPC request
* @timeout: timeout values to use for this task
nfs41: Add backchannel processing support to RPC state machine Adds rpc_run_bc_task() which is called by the NFS callback service to process backchannel requests. It performs similar work to rpc_run_task() though "schedules" the backchannel task to be executed starting at the call_trasmit state in the RPC state machine. It also introduces some miscellaneous updates to the argument validation, call_transmit, and transport cleanup functions to take into account that there are now forechannel and backchannel tasks. Backchannel requests do not carry an RPC message structure, since the payload has already been XDR encoded using the existing NFSv4 callback mechanism. Introduce a new transmit state for the client to reply on to backchannel requests. This new state simply reserves the transport and issues the reply. In case of a connection related error, disconnects the transport and drops the reply. It requires the forechannel to re-establish the connection and the server to retransmit the request, as stated in NFSv4.1 section 2.9.2 "Client and Server Transport Behavior". Note: There is no need to loop attempting to reserve the transport. If EAGAIN is returned by xprt_prepare_transmit(), return with tk_status == 0, setting tk_action to call_bc_transmit. rpc_execute() will invoke it again after the task is taken off the sleep queue. [nfs41: rpc_run_bc_task() need not be exported outside RPC module] [nfs41: New call_bc_transmit RPC state] Signed-off-by: Ricardo Labiaga <Ricardo.Labiaga@netapp.com> Signed-off-by: Benny Halevy <bhalevy@panasas.com> [nfs41: Backchannel: No need to loop in call_bc_transmit()] Signed-off-by: Andy Adamson <andros@netapp.com> Signed-off-by: Ricardo Labiaga <Ricardo.Labiaga@netapp.com> Signed-off-by: Benny Halevy <bhalevy@panasas.com> [rpc_count_iostats incorrectly exits early] Signed-off-by: Ricardo Labiaga <Ricardo.Labiaga@netapp.com> Signed-off-by: Benny Halevy <bhalevy@panasas.com> [Convert rpc_reply_expected() to inline function] [Remove unnecessary BUG_ON()] [Rename variable] Signed-off-by: Ricardo Labiaga <Ricardo.Labiaga@netapp.com> Signed-off-by: Benny Halevy <bhalevy@panasas.com>
2009-04-01 17:23:03 +04:00
*/
struct rpc_task *rpc_run_bc_task(struct rpc_rqst *req,
struct rpc_timeout *timeout)
nfs41: Add backchannel processing support to RPC state machine Adds rpc_run_bc_task() which is called by the NFS callback service to process backchannel requests. It performs similar work to rpc_run_task() though "schedules" the backchannel task to be executed starting at the call_trasmit state in the RPC state machine. It also introduces some miscellaneous updates to the argument validation, call_transmit, and transport cleanup functions to take into account that there are now forechannel and backchannel tasks. Backchannel requests do not carry an RPC message structure, since the payload has already been XDR encoded using the existing NFSv4 callback mechanism. Introduce a new transmit state for the client to reply on to backchannel requests. This new state simply reserves the transport and issues the reply. In case of a connection related error, disconnects the transport and drops the reply. It requires the forechannel to re-establish the connection and the server to retransmit the request, as stated in NFSv4.1 section 2.9.2 "Client and Server Transport Behavior". Note: There is no need to loop attempting to reserve the transport. If EAGAIN is returned by xprt_prepare_transmit(), return with tk_status == 0, setting tk_action to call_bc_transmit. rpc_execute() will invoke it again after the task is taken off the sleep queue. [nfs41: rpc_run_bc_task() need not be exported outside RPC module] [nfs41: New call_bc_transmit RPC state] Signed-off-by: Ricardo Labiaga <Ricardo.Labiaga@netapp.com> Signed-off-by: Benny Halevy <bhalevy@panasas.com> [nfs41: Backchannel: No need to loop in call_bc_transmit()] Signed-off-by: Andy Adamson <andros@netapp.com> Signed-off-by: Ricardo Labiaga <Ricardo.Labiaga@netapp.com> Signed-off-by: Benny Halevy <bhalevy@panasas.com> [rpc_count_iostats incorrectly exits early] Signed-off-by: Ricardo Labiaga <Ricardo.Labiaga@netapp.com> Signed-off-by: Benny Halevy <bhalevy@panasas.com> [Convert rpc_reply_expected() to inline function] [Remove unnecessary BUG_ON()] [Rename variable] Signed-off-by: Ricardo Labiaga <Ricardo.Labiaga@netapp.com> Signed-off-by: Benny Halevy <bhalevy@panasas.com>
2009-04-01 17:23:03 +04:00
{
struct rpc_task *task;
struct rpc_task_setup task_setup_data = {
.callback_ops = &rpc_default_ops,
.flags = RPC_TASK_SOFTCONN |
RPC_TASK_NO_RETRANS_TIMEOUT,
nfs41: Add backchannel processing support to RPC state machine Adds rpc_run_bc_task() which is called by the NFS callback service to process backchannel requests. It performs similar work to rpc_run_task() though "schedules" the backchannel task to be executed starting at the call_trasmit state in the RPC state machine. It also introduces some miscellaneous updates to the argument validation, call_transmit, and transport cleanup functions to take into account that there are now forechannel and backchannel tasks. Backchannel requests do not carry an RPC message structure, since the payload has already been XDR encoded using the existing NFSv4 callback mechanism. Introduce a new transmit state for the client to reply on to backchannel requests. This new state simply reserves the transport and issues the reply. In case of a connection related error, disconnects the transport and drops the reply. It requires the forechannel to re-establish the connection and the server to retransmit the request, as stated in NFSv4.1 section 2.9.2 "Client and Server Transport Behavior". Note: There is no need to loop attempting to reserve the transport. If EAGAIN is returned by xprt_prepare_transmit(), return with tk_status == 0, setting tk_action to call_bc_transmit. rpc_execute() will invoke it again after the task is taken off the sleep queue. [nfs41: rpc_run_bc_task() need not be exported outside RPC module] [nfs41: New call_bc_transmit RPC state] Signed-off-by: Ricardo Labiaga <Ricardo.Labiaga@netapp.com> Signed-off-by: Benny Halevy <bhalevy@panasas.com> [nfs41: Backchannel: No need to loop in call_bc_transmit()] Signed-off-by: Andy Adamson <andros@netapp.com> Signed-off-by: Ricardo Labiaga <Ricardo.Labiaga@netapp.com> Signed-off-by: Benny Halevy <bhalevy@panasas.com> [rpc_count_iostats incorrectly exits early] Signed-off-by: Ricardo Labiaga <Ricardo.Labiaga@netapp.com> Signed-off-by: Benny Halevy <bhalevy@panasas.com> [Convert rpc_reply_expected() to inline function] [Remove unnecessary BUG_ON()] [Rename variable] Signed-off-by: Ricardo Labiaga <Ricardo.Labiaga@netapp.com> Signed-off-by: Benny Halevy <bhalevy@panasas.com>
2009-04-01 17:23:03 +04:00
};
dprintk("RPC: rpc_run_bc_task req= %p\n", req);
/*
* Create an rpc_task to send the data
*/
task = rpc_new_task(&task_setup_data);
if (IS_ERR(task)) {
xprt_free_bc_request(req);
return task;
}
xprt_init_bc_request(req, task, timeout);
nfs41: Add backchannel processing support to RPC state machine Adds rpc_run_bc_task() which is called by the NFS callback service to process backchannel requests. It performs similar work to rpc_run_task() though "schedules" the backchannel task to be executed starting at the call_trasmit state in the RPC state machine. It also introduces some miscellaneous updates to the argument validation, call_transmit, and transport cleanup functions to take into account that there are now forechannel and backchannel tasks. Backchannel requests do not carry an RPC message structure, since the payload has already been XDR encoded using the existing NFSv4 callback mechanism. Introduce a new transmit state for the client to reply on to backchannel requests. This new state simply reserves the transport and issues the reply. In case of a connection related error, disconnects the transport and drops the reply. It requires the forechannel to re-establish the connection and the server to retransmit the request, as stated in NFSv4.1 section 2.9.2 "Client and Server Transport Behavior". Note: There is no need to loop attempting to reserve the transport. If EAGAIN is returned by xprt_prepare_transmit(), return with tk_status == 0, setting tk_action to call_bc_transmit. rpc_execute() will invoke it again after the task is taken off the sleep queue. [nfs41: rpc_run_bc_task() need not be exported outside RPC module] [nfs41: New call_bc_transmit RPC state] Signed-off-by: Ricardo Labiaga <Ricardo.Labiaga@netapp.com> Signed-off-by: Benny Halevy <bhalevy@panasas.com> [nfs41: Backchannel: No need to loop in call_bc_transmit()] Signed-off-by: Andy Adamson <andros@netapp.com> Signed-off-by: Ricardo Labiaga <Ricardo.Labiaga@netapp.com> Signed-off-by: Benny Halevy <bhalevy@panasas.com> [rpc_count_iostats incorrectly exits early] Signed-off-by: Ricardo Labiaga <Ricardo.Labiaga@netapp.com> Signed-off-by: Benny Halevy <bhalevy@panasas.com> [Convert rpc_reply_expected() to inline function] [Remove unnecessary BUG_ON()] [Rename variable] Signed-off-by: Ricardo Labiaga <Ricardo.Labiaga@netapp.com> Signed-off-by: Benny Halevy <bhalevy@panasas.com>
2009-04-01 17:23:03 +04:00
task->tk_action = call_bc_encode;
nfs41: Add backchannel processing support to RPC state machine Adds rpc_run_bc_task() which is called by the NFS callback service to process backchannel requests. It performs similar work to rpc_run_task() though "schedules" the backchannel task to be executed starting at the call_trasmit state in the RPC state machine. It also introduces some miscellaneous updates to the argument validation, call_transmit, and transport cleanup functions to take into account that there are now forechannel and backchannel tasks. Backchannel requests do not carry an RPC message structure, since the payload has already been XDR encoded using the existing NFSv4 callback mechanism. Introduce a new transmit state for the client to reply on to backchannel requests. This new state simply reserves the transport and issues the reply. In case of a connection related error, disconnects the transport and drops the reply. It requires the forechannel to re-establish the connection and the server to retransmit the request, as stated in NFSv4.1 section 2.9.2 "Client and Server Transport Behavior". Note: There is no need to loop attempting to reserve the transport. If EAGAIN is returned by xprt_prepare_transmit(), return with tk_status == 0, setting tk_action to call_bc_transmit. rpc_execute() will invoke it again after the task is taken off the sleep queue. [nfs41: rpc_run_bc_task() need not be exported outside RPC module] [nfs41: New call_bc_transmit RPC state] Signed-off-by: Ricardo Labiaga <Ricardo.Labiaga@netapp.com> Signed-off-by: Benny Halevy <bhalevy@panasas.com> [nfs41: Backchannel: No need to loop in call_bc_transmit()] Signed-off-by: Andy Adamson <andros@netapp.com> Signed-off-by: Ricardo Labiaga <Ricardo.Labiaga@netapp.com> Signed-off-by: Benny Halevy <bhalevy@panasas.com> [rpc_count_iostats incorrectly exits early] Signed-off-by: Ricardo Labiaga <Ricardo.Labiaga@netapp.com> Signed-off-by: Benny Halevy <bhalevy@panasas.com> [Convert rpc_reply_expected() to inline function] [Remove unnecessary BUG_ON()] [Rename variable] Signed-off-by: Ricardo Labiaga <Ricardo.Labiaga@netapp.com> Signed-off-by: Benny Halevy <bhalevy@panasas.com>
2009-04-01 17:23:03 +04:00
atomic_inc(&task->tk_count);
WARN_ON_ONCE(atomic_read(&task->tk_count) != 2);
nfs41: Add backchannel processing support to RPC state machine Adds rpc_run_bc_task() which is called by the NFS callback service to process backchannel requests. It performs similar work to rpc_run_task() though "schedules" the backchannel task to be executed starting at the call_trasmit state in the RPC state machine. It also introduces some miscellaneous updates to the argument validation, call_transmit, and transport cleanup functions to take into account that there are now forechannel and backchannel tasks. Backchannel requests do not carry an RPC message structure, since the payload has already been XDR encoded using the existing NFSv4 callback mechanism. Introduce a new transmit state for the client to reply on to backchannel requests. This new state simply reserves the transport and issues the reply. In case of a connection related error, disconnects the transport and drops the reply. It requires the forechannel to re-establish the connection and the server to retransmit the request, as stated in NFSv4.1 section 2.9.2 "Client and Server Transport Behavior". Note: There is no need to loop attempting to reserve the transport. If EAGAIN is returned by xprt_prepare_transmit(), return with tk_status == 0, setting tk_action to call_bc_transmit. rpc_execute() will invoke it again after the task is taken off the sleep queue. [nfs41: rpc_run_bc_task() need not be exported outside RPC module] [nfs41: New call_bc_transmit RPC state] Signed-off-by: Ricardo Labiaga <Ricardo.Labiaga@netapp.com> Signed-off-by: Benny Halevy <bhalevy@panasas.com> [nfs41: Backchannel: No need to loop in call_bc_transmit()] Signed-off-by: Andy Adamson <andros@netapp.com> Signed-off-by: Ricardo Labiaga <Ricardo.Labiaga@netapp.com> Signed-off-by: Benny Halevy <bhalevy@panasas.com> [rpc_count_iostats incorrectly exits early] Signed-off-by: Ricardo Labiaga <Ricardo.Labiaga@netapp.com> Signed-off-by: Benny Halevy <bhalevy@panasas.com> [Convert rpc_reply_expected() to inline function] [Remove unnecessary BUG_ON()] [Rename variable] Signed-off-by: Ricardo Labiaga <Ricardo.Labiaga@netapp.com> Signed-off-by: Benny Halevy <bhalevy@panasas.com>
2009-04-01 17:23:03 +04:00
rpc_execute(task);
dprintk("RPC: rpc_run_bc_task: task= %p\n", task);
return task;
}
#endif /* CONFIG_SUNRPC_BACKCHANNEL */
nfs41: Add backchannel processing support to RPC state machine Adds rpc_run_bc_task() which is called by the NFS callback service to process backchannel requests. It performs similar work to rpc_run_task() though "schedules" the backchannel task to be executed starting at the call_trasmit state in the RPC state machine. It also introduces some miscellaneous updates to the argument validation, call_transmit, and transport cleanup functions to take into account that there are now forechannel and backchannel tasks. Backchannel requests do not carry an RPC message structure, since the payload has already been XDR encoded using the existing NFSv4 callback mechanism. Introduce a new transmit state for the client to reply on to backchannel requests. This new state simply reserves the transport and issues the reply. In case of a connection related error, disconnects the transport and drops the reply. It requires the forechannel to re-establish the connection and the server to retransmit the request, as stated in NFSv4.1 section 2.9.2 "Client and Server Transport Behavior". Note: There is no need to loop attempting to reserve the transport. If EAGAIN is returned by xprt_prepare_transmit(), return with tk_status == 0, setting tk_action to call_bc_transmit. rpc_execute() will invoke it again after the task is taken off the sleep queue. [nfs41: rpc_run_bc_task() need not be exported outside RPC module] [nfs41: New call_bc_transmit RPC state] Signed-off-by: Ricardo Labiaga <Ricardo.Labiaga@netapp.com> Signed-off-by: Benny Halevy <bhalevy@panasas.com> [nfs41: Backchannel: No need to loop in call_bc_transmit()] Signed-off-by: Andy Adamson <andros@netapp.com> Signed-off-by: Ricardo Labiaga <Ricardo.Labiaga@netapp.com> Signed-off-by: Benny Halevy <bhalevy@panasas.com> [rpc_count_iostats incorrectly exits early] Signed-off-by: Ricardo Labiaga <Ricardo.Labiaga@netapp.com> Signed-off-by: Benny Halevy <bhalevy@panasas.com> [Convert rpc_reply_expected() to inline function] [Remove unnecessary BUG_ON()] [Rename variable] Signed-off-by: Ricardo Labiaga <Ricardo.Labiaga@netapp.com> Signed-off-by: Benny Halevy <bhalevy@panasas.com>
2009-04-01 17:23:03 +04:00
/**
* rpc_prepare_reply_pages - Prepare to receive a reply data payload into pages
* @req: RPC request to prepare
* @pages: vector of struct page pointers
* @base: offset in first page where receive should start, in bytes
* @len: expected size of the upper layer data payload, in bytes
* @hdrsize: expected size of upper layer reply header, in XDR words
*
*/
void rpc_prepare_reply_pages(struct rpc_rqst *req, struct page **pages,
unsigned int base, unsigned int len,
unsigned int hdrsize)
{
hdrsize += RPC_REPHDRSIZE + req->rq_cred->cr_auth->au_ralign;
xdr_inline_pages(&req->rq_rcv_buf, hdrsize << 2, pages, base, len);
trace_rpc_xdr_reply_pages(req->rq_task, &req->rq_rcv_buf);
}
EXPORT_SYMBOL_GPL(rpc_prepare_reply_pages);
void
rpc_call_start(struct rpc_task *task)
{
task->tk_action = call_start;
}
EXPORT_SYMBOL_GPL(rpc_call_start);
/**
* rpc_peeraddr - extract remote peer address from clnt's xprt
* @clnt: RPC client structure
* @buf: target buffer
* @bufsize: length of target buffer
*
* Returns the number of bytes that are actually in the stored address.
*/
size_t rpc_peeraddr(struct rpc_clnt *clnt, struct sockaddr *buf, size_t bufsize)
{
size_t bytes;
struct rpc_xprt *xprt;
rcu_read_lock();
xprt = rcu_dereference(clnt->cl_xprt);
bytes = xprt->addrlen;
if (bytes > bufsize)
bytes = bufsize;
memcpy(buf, &xprt->addr, bytes);
rcu_read_unlock();
return bytes;
}
EXPORT_SYMBOL_GPL(rpc_peeraddr);
/**
* rpc_peeraddr2str - return remote peer address in printable format
* @clnt: RPC client structure
* @format: address format
*
* NB: the lifetime of the memory referenced by the returned pointer is
* the same as the rpc_xprt itself. As long as the caller uses this
* pointer, it must hold the RCU read lock.
*/
const char *rpc_peeraddr2str(struct rpc_clnt *clnt,
enum rpc_display_format_t format)
{
struct rpc_xprt *xprt;
xprt = rcu_dereference(clnt->cl_xprt);
if (xprt->address_strings[format] != NULL)
return xprt->address_strings[format];
else
return "unprintable";
}
EXPORT_SYMBOL_GPL(rpc_peeraddr2str);
static const struct sockaddr_in rpc_inaddr_loopback = {
.sin_family = AF_INET,
.sin_addr.s_addr = htonl(INADDR_ANY),
};
static const struct sockaddr_in6 rpc_in6addr_loopback = {
.sin6_family = AF_INET6,
.sin6_addr = IN6ADDR_ANY_INIT,
};
/*
* Try a getsockname() on a connected datagram socket. Using a
* connected datagram socket prevents leaving a socket in TIME_WAIT.
* This conserves the ephemeral port number space.
*
* Returns zero and fills in "buf" if successful; otherwise, a
* negative errno is returned.
*/
static int rpc_sockname(struct net *net, struct sockaddr *sap, size_t salen,
net: make getname() functions return length rather than use int* parameter Changes since v1: Added changes in these files: drivers/infiniband/hw/usnic/usnic_transport.c drivers/staging/lustre/lnet/lnet/lib-socket.c drivers/target/iscsi/iscsi_target_login.c drivers/vhost/net.c fs/dlm/lowcomms.c fs/ocfs2/cluster/tcp.c security/tomoyo/network.c Before: All these functions either return a negative error indicator, or store length of sockaddr into "int *socklen" parameter and return zero on success. "int *socklen" parameter is awkward. For example, if caller does not care, it still needs to provide on-stack storage for the value it does not need. None of the many FOO_getname() functions of various protocols ever used old value of *socklen. They always just overwrite it. This change drops this parameter, and makes all these functions, on success, return length of sockaddr. It's always >= 0 and can be differentiated from an error. Tests in callers are changed from "if (err)" to "if (err < 0)", where needed. rpc_sockname() lost "int buflen" parameter, since its only use was to be passed to kernel_getsockname() as &buflen and subsequently not used in any way. Userspace API is not changed. text data bss dec hex filename 30108430 2633624 873672 33615726 200ef6e vmlinux.before.o 30108109 2633612 873672 33615393 200ee21 vmlinux.o Signed-off-by: Denys Vlasenko <dvlasenk@redhat.com> CC: David S. Miller <davem@davemloft.net> CC: linux-kernel@vger.kernel.org CC: netdev@vger.kernel.org CC: linux-bluetooth@vger.kernel.org CC: linux-decnet-user@lists.sourceforge.net CC: linux-wireless@vger.kernel.org CC: linux-rdma@vger.kernel.org CC: linux-sctp@vger.kernel.org CC: linux-nfs@vger.kernel.org CC: linux-x25@vger.kernel.org Signed-off-by: David S. Miller <davem@davemloft.net>
2018-02-12 22:00:20 +03:00
struct sockaddr *buf)
{
struct socket *sock;
int err;
err = __sock_create(net, sap->sa_family,
SOCK_DGRAM, IPPROTO_UDP, &sock, 1);
if (err < 0) {
dprintk("RPC: can't create UDP socket (%d)\n", err);
goto out;
}
switch (sap->sa_family) {
case AF_INET:
err = kernel_bind(sock,
(struct sockaddr *)&rpc_inaddr_loopback,
sizeof(rpc_inaddr_loopback));
break;
case AF_INET6:
err = kernel_bind(sock,
(struct sockaddr *)&rpc_in6addr_loopback,
sizeof(rpc_in6addr_loopback));
break;
default:
err = -EAFNOSUPPORT;
goto out_release;
}
if (err < 0) {
dprintk("RPC: can't bind UDP socket (%d)\n", err);
goto out_release;
}
err = kernel_connect(sock, sap, salen, 0);
if (err < 0) {
dprintk("RPC: can't connect UDP socket (%d)\n", err);
goto out_release;
}
net: make getname() functions return length rather than use int* parameter Changes since v1: Added changes in these files: drivers/infiniband/hw/usnic/usnic_transport.c drivers/staging/lustre/lnet/lnet/lib-socket.c drivers/target/iscsi/iscsi_target_login.c drivers/vhost/net.c fs/dlm/lowcomms.c fs/ocfs2/cluster/tcp.c security/tomoyo/network.c Before: All these functions either return a negative error indicator, or store length of sockaddr into "int *socklen" parameter and return zero on success. "int *socklen" parameter is awkward. For example, if caller does not care, it still needs to provide on-stack storage for the value it does not need. None of the many FOO_getname() functions of various protocols ever used old value of *socklen. They always just overwrite it. This change drops this parameter, and makes all these functions, on success, return length of sockaddr. It's always >= 0 and can be differentiated from an error. Tests in callers are changed from "if (err)" to "if (err < 0)", where needed. rpc_sockname() lost "int buflen" parameter, since its only use was to be passed to kernel_getsockname() as &buflen and subsequently not used in any way. Userspace API is not changed. text data bss dec hex filename 30108430 2633624 873672 33615726 200ef6e vmlinux.before.o 30108109 2633612 873672 33615393 200ee21 vmlinux.o Signed-off-by: Denys Vlasenko <dvlasenk@redhat.com> CC: David S. Miller <davem@davemloft.net> CC: linux-kernel@vger.kernel.org CC: netdev@vger.kernel.org CC: linux-bluetooth@vger.kernel.org CC: linux-decnet-user@lists.sourceforge.net CC: linux-wireless@vger.kernel.org CC: linux-rdma@vger.kernel.org CC: linux-sctp@vger.kernel.org CC: linux-nfs@vger.kernel.org CC: linux-x25@vger.kernel.org Signed-off-by: David S. Miller <davem@davemloft.net>
2018-02-12 22:00:20 +03:00
err = kernel_getsockname(sock, buf);
if (err < 0) {
dprintk("RPC: getsockname failed (%d)\n", err);
goto out_release;
}
err = 0;
if (buf->sa_family == AF_INET6) {
struct sockaddr_in6 *sin6 = (struct sockaddr_in6 *)buf;
sin6->sin6_scope_id = 0;
}
dprintk("RPC: %s succeeded\n", __func__);
out_release:
sock_release(sock);
out:
return err;
}
/*
* Scraping a connected socket failed, so we don't have a useable
* local address. Fallback: generate an address that will prevent
* the server from calling us back.
*
* Returns zero and fills in "buf" if successful; otherwise, a
* negative errno is returned.
*/
static int rpc_anyaddr(int family, struct sockaddr *buf, size_t buflen)
{
switch (family) {
case AF_INET:
if (buflen < sizeof(rpc_inaddr_loopback))
return -EINVAL;
memcpy(buf, &rpc_inaddr_loopback,
sizeof(rpc_inaddr_loopback));
break;
case AF_INET6:
if (buflen < sizeof(rpc_in6addr_loopback))
return -EINVAL;
memcpy(buf, &rpc_in6addr_loopback,
sizeof(rpc_in6addr_loopback));
break;
default:
dprintk("RPC: %s: address family not supported\n",
__func__);
return -EAFNOSUPPORT;
}
dprintk("RPC: %s: succeeded\n", __func__);
return 0;
}
/**
* rpc_localaddr - discover local endpoint address for an RPC client
* @clnt: RPC client structure
* @buf: target buffer
* @buflen: size of target buffer, in bytes
*
* Returns zero and fills in "buf" and "buflen" if successful;
* otherwise, a negative errno is returned.
*
* This works even if the underlying transport is not currently connected,
* or if the upper layer never previously provided a source address.
*
* The result of this function call is transient: multiple calls in
* succession may give different results, depending on how local
* networking configuration changes over time.
*/
int rpc_localaddr(struct rpc_clnt *clnt, struct sockaddr *buf, size_t buflen)
{
struct sockaddr_storage address;
struct sockaddr *sap = (struct sockaddr *)&address;
struct rpc_xprt *xprt;
struct net *net;
size_t salen;
int err;
rcu_read_lock();
xprt = rcu_dereference(clnt->cl_xprt);
salen = xprt->addrlen;
memcpy(sap, &xprt->addr, salen);
net = get_net(xprt->xprt_net);
rcu_read_unlock();
rpc_set_port(sap, 0);
net: make getname() functions return length rather than use int* parameter Changes since v1: Added changes in these files: drivers/infiniband/hw/usnic/usnic_transport.c drivers/staging/lustre/lnet/lnet/lib-socket.c drivers/target/iscsi/iscsi_target_login.c drivers/vhost/net.c fs/dlm/lowcomms.c fs/ocfs2/cluster/tcp.c security/tomoyo/network.c Before: All these functions either return a negative error indicator, or store length of sockaddr into "int *socklen" parameter and return zero on success. "int *socklen" parameter is awkward. For example, if caller does not care, it still needs to provide on-stack storage for the value it does not need. None of the many FOO_getname() functions of various protocols ever used old value of *socklen. They always just overwrite it. This change drops this parameter, and makes all these functions, on success, return length of sockaddr. It's always >= 0 and can be differentiated from an error. Tests in callers are changed from "if (err)" to "if (err < 0)", where needed. rpc_sockname() lost "int buflen" parameter, since its only use was to be passed to kernel_getsockname() as &buflen and subsequently not used in any way. Userspace API is not changed. text data bss dec hex filename 30108430 2633624 873672 33615726 200ef6e vmlinux.before.o 30108109 2633612 873672 33615393 200ee21 vmlinux.o Signed-off-by: Denys Vlasenko <dvlasenk@redhat.com> CC: David S. Miller <davem@davemloft.net> CC: linux-kernel@vger.kernel.org CC: netdev@vger.kernel.org CC: linux-bluetooth@vger.kernel.org CC: linux-decnet-user@lists.sourceforge.net CC: linux-wireless@vger.kernel.org CC: linux-rdma@vger.kernel.org CC: linux-sctp@vger.kernel.org CC: linux-nfs@vger.kernel.org CC: linux-x25@vger.kernel.org Signed-off-by: David S. Miller <davem@davemloft.net>
2018-02-12 22:00:20 +03:00
err = rpc_sockname(net, sap, salen, buf);
put_net(net);
if (err != 0)
/* Couldn't discover local address, return ANYADDR */
return rpc_anyaddr(sap->sa_family, buf, buflen);
return 0;
}
EXPORT_SYMBOL_GPL(rpc_localaddr);
void
rpc_setbufsize(struct rpc_clnt *clnt, unsigned int sndsize, unsigned int rcvsize)
{
struct rpc_xprt *xprt;
rcu_read_lock();
xprt = rcu_dereference(clnt->cl_xprt);
if (xprt->ops->set_buffer_size)
xprt->ops->set_buffer_size(xprt, sndsize, rcvsize);
rcu_read_unlock();
}
EXPORT_SYMBOL_GPL(rpc_setbufsize);
/**
* rpc_net_ns - Get the network namespace for this RPC client
* @clnt: RPC client to query
*
*/
struct net *rpc_net_ns(struct rpc_clnt *clnt)
{
struct net *ret;
rcu_read_lock();
ret = rcu_dereference(clnt->cl_xprt)->xprt_net;
rcu_read_unlock();
return ret;
}
EXPORT_SYMBOL_GPL(rpc_net_ns);
/**
* rpc_max_payload - Get maximum payload size for a transport, in bytes
* @clnt: RPC client to query
*
* For stream transports, this is one RPC record fragment (see RFC
* 1831), as we don't support multi-record requests yet. For datagram
* transports, this is the size of an IP packet minus the IP, UDP, and
* RPC header sizes.
*/
size_t rpc_max_payload(struct rpc_clnt *clnt)
{
size_t ret;
rcu_read_lock();
ret = rcu_dereference(clnt->cl_xprt)->max_payload;
rcu_read_unlock();
return ret;
}
EXPORT_SYMBOL_GPL(rpc_max_payload);
/**
* rpc_max_bc_payload - Get maximum backchannel payload size, in bytes
* @clnt: RPC client to query
*/
size_t rpc_max_bc_payload(struct rpc_clnt *clnt)
{
struct rpc_xprt *xprt;
size_t ret;
rcu_read_lock();
xprt = rcu_dereference(clnt->cl_xprt);
ret = xprt->ops->bc_maxpayload(xprt);
rcu_read_unlock();
return ret;
}
EXPORT_SYMBOL_GPL(rpc_max_bc_payload);
unsigned int rpc_num_bc_slots(struct rpc_clnt *clnt)
{
struct rpc_xprt *xprt;
unsigned int ret;
rcu_read_lock();
xprt = rcu_dereference(clnt->cl_xprt);
ret = xprt->ops->bc_num_slots(xprt);
rcu_read_unlock();
return ret;
}
EXPORT_SYMBOL_GPL(rpc_num_bc_slots);
/**
* rpc_force_rebind - force transport to check that remote port is unchanged
* @clnt: client to rebind
*
*/
void rpc_force_rebind(struct rpc_clnt *clnt)
{
if (clnt->cl_autobind) {
rcu_read_lock();
xprt_clear_bound(rcu_dereference(clnt->cl_xprt));
rcu_read_unlock();
}
}
EXPORT_SYMBOL_GPL(rpc_force_rebind);
static int
__rpc_restart_call(struct rpc_task *task, void (*action)(struct rpc_task *))
{
task->tk_status = 0;
task->tk_rpc_status = 0;
task->tk_action = action;
return 1;
}
/*
* Restart an (async) RPC call. Usually called from within the
* exit handler.
*/
int
rpc_restart_call(struct rpc_task *task)
{
return __rpc_restart_call(task, call_start);
}
EXPORT_SYMBOL_GPL(rpc_restart_call);
/*
* Restart an (async) RPC call from the call_prepare state.
* Usually called from within the exit handler.
*/
int
rpc_restart_call_prepare(struct rpc_task *task)
{
if (task->tk_ops->rpc_call_prepare != NULL)
return __rpc_restart_call(task, rpc_prepare_task);
return rpc_restart_call(task);
}
EXPORT_SYMBOL_GPL(rpc_restart_call_prepare);
const char
*rpc_proc_name(const struct rpc_task *task)
{
const struct rpc_procinfo *proc = task->tk_msg.rpc_proc;
if (proc) {
if (proc->p_name)
return proc->p_name;
else
return "NULL";
} else
return "no proc";
}
static void
__rpc_call_rpcerror(struct rpc_task *task, int tk_status, int rpc_status)
{
trace_rpc_call_rpcerror(task, tk_status, rpc_status);
rpc_task_set_rpc_status(task, rpc_status);
rpc_exit(task, tk_status);
}
static void
rpc_call_rpcerror(struct rpc_task *task, int status)
{
__rpc_call_rpcerror(task, status, status);
}
/*
* 0. Initial state
*
* Other FSM states can be visited zero or more times, but
* this state is visited exactly once for each RPC.
*/
static void
call_start(struct rpc_task *task)
{
struct rpc_clnt *clnt = task->tk_client;
int idx = task->tk_msg.rpc_proc->p_statidx;
trace_rpc_request(task);
if (task->tk_client->cl_shutdown) {
rpc_call_rpcerror(task, -EIO);
return;
}
/* Increment call count (version might not be valid for ping) */
if (clnt->cl_program->version[clnt->cl_vers])
clnt->cl_program->version[clnt->cl_vers]->counts[idx]++;
clnt->cl_stats->rpccnt++;
task->tk_action = call_reserve;
rpc_task_set_transport(task, clnt);
}
/*
* 1. Reserve an RPC call slot
*/
static void
call_reserve(struct rpc_task *task)
{
task->tk_status = 0;
task->tk_action = call_reserveresult;
xprt_reserve(task);
}
static void call_retry_reserve(struct rpc_task *task);
/*
* 1b. Grok the result of xprt_reserve()
*/
static void
call_reserveresult(struct rpc_task *task)
{
int status = task->tk_status;
/*
* After a call to xprt_reserve(), we must have either
* a request slot or else an error status.
*/
task->tk_status = 0;
if (status >= 0) {
if (task->tk_rqstp) {
task->tk_action = call_refresh;
return;
}
rpc_call_rpcerror(task, -EIO);
return;
}
switch (status) {
case -ENOMEM:
rpc_delay(task, HZ >> 2);
fallthrough;
case -EAGAIN: /* woken up; retry */
task->tk_action = call_retry_reserve;
return;
default:
rpc_call_rpcerror(task, status);
}
}
/*
* 1c. Retry reserving an RPC call slot
*/
static void
call_retry_reserve(struct rpc_task *task)
{
task->tk_status = 0;
task->tk_action = call_reserveresult;
xprt_retry_reserve(task);
}
/*
* 2. Bind and/or refresh the credentials
*/
static void
call_refresh(struct rpc_task *task)
{
task->tk_action = call_refreshresult;
task->tk_status = 0;
task->tk_client->cl_stats->rpcauthrefresh++;
rpcauth_refreshcred(task);
}
/*
* 2a. Process the results of a credential refresh
*/
static void
call_refreshresult(struct rpc_task *task)
{
int status = task->tk_status;
task->tk_status = 0;
task->tk_action = call_refresh;
switch (status) {
case 0:
sunrpc: Fix infinite loop in RPC state machine When a task enters call_refreshresult with status 0 from call_refresh and !rpcauth_uptodatecred(task) it enters call_refresh again with no rate-limiting or max number of retries. Instead of trying forever, make use of the retry path that other errors use. This only seems to be possible when the crrefresh callback is gss_refresh_null, which only happens when destroying the context. To reproduce: 1) mount with sec=krb5 (or sec=sys with krb5 negotiated for non FSID specific operations). 2) reboot - the client will be stuck and will need to be hard rebooted BUG: soft lockup - CPU#0 stuck for 22s! [kworker/0:2:46] Modules linked in: rpcsec_gss_krb5 nfsv4 nfs fscache ppdev crc32c_intel aesni_intel aes_x86_64 glue_helper lrw gf128mul ablk_helper cryptd serio_raw i2c_piix4 i2c_core e1000 parport_pc parport shpchp nfsd auth_rpcgss oid_registry exportfs nfs_acl lockd sunrpc autofs4 mptspi scsi_transport_spi mptscsih mptbase ata_generic floppy irq event stamp: 195724 hardirqs last enabled at (195723): [<ffffffff814a925c>] restore_args+0x0/0x30 hardirqs last disabled at (195724): [<ffffffff814b0a6a>] apic_timer_interrupt+0x6a/0x80 softirqs last enabled at (195722): [<ffffffff8103f583>] __do_softirq+0x1df/0x276 softirqs last disabled at (195717): [<ffffffff8103f852>] irq_exit+0x53/0x9a CPU: 0 PID: 46 Comm: kworker/0:2 Not tainted 3.13.0-rc3-branch-dros_testing+ #4 Hardware name: VMware, Inc. VMware Virtual Platform/440BX Desktop Reference Platform, BIOS 6.00 07/31/2013 Workqueue: rpciod rpc_async_schedule [sunrpc] task: ffff8800799c4260 ti: ffff880079002000 task.ti: ffff880079002000 RIP: 0010:[<ffffffffa0064fd4>] [<ffffffffa0064fd4>] __rpc_execute+0x8a/0x362 [sunrpc] RSP: 0018:ffff880079003d18 EFLAGS: 00000246 RAX: 0000000000000005 RBX: 0000000000000007 RCX: 0000000000000007 RDX: 0000000000000007 RSI: ffff88007aecbae8 RDI: ffff8800783d8900 RBP: ffff880079003d78 R08: ffff88006e30e9f8 R09: ffffffffa005a3d7 R10: ffff88006e30e7b0 R11: ffff8800783d8900 R12: ffffffffa006675e R13: ffff880079003ce8 R14: ffff88006e30e7b0 R15: ffff8800783d8900 FS: 0000000000000000(0000) GS:ffff88007f200000(0000) knlGS:0000000000000000 CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 CR2: 00007f3072333000 CR3: 0000000001a0b000 CR4: 00000000001407f0 Stack: ffff880079003d98 0000000000000246 0000000000000000 ffff88007a9a4830 ffff880000000000 ffffffff81073f47 ffff88007f212b00 ffff8800799c4260 ffff8800783d8988 ffff88007f212b00 ffffe8ffff604800 0000000000000000 Call Trace: [<ffffffff81073f47>] ? trace_hardirqs_on_caller+0x145/0x1a1 [<ffffffffa00652d3>] rpc_async_schedule+0x27/0x32 [sunrpc] [<ffffffff81052974>] process_one_work+0x211/0x3a5 [<ffffffff810528d5>] ? process_one_work+0x172/0x3a5 [<ffffffff81052eeb>] worker_thread+0x134/0x202 [<ffffffff81052db7>] ? rescuer_thread+0x280/0x280 [<ffffffff81052db7>] ? rescuer_thread+0x280/0x280 [<ffffffff810584a0>] kthread+0xc9/0xd1 [<ffffffff810583d7>] ? __kthread_parkme+0x61/0x61 [<ffffffff814afd6c>] ret_from_fork+0x7c/0xb0 [<ffffffff810583d7>] ? __kthread_parkme+0x61/0x61 Code: e8 87 63 fd e0 c6 05 10 dd 01 00 01 48 8b 43 70 4c 8d 6b 70 45 31 e4 a8 02 0f 85 d5 02 00 00 4c 8b 7b 48 48 c7 43 48 00 00 00 00 <4c> 8b 4b 50 4d 85 ff 75 0c 4d 85 c9 4d 89 cf 0f 84 32 01 00 00 And the output of "rpcdebug -m rpc -s all": RPC: 61 call_refresh (status 0) RPC: 61 call_refresh (status 0) RPC: 61 refreshing RPCSEC_GSS cred ffff88007a413cf0 RPC: 61 refreshing RPCSEC_GSS cred ffff88007a413cf0 RPC: 61 call_refreshresult (status 0) RPC: 61 refreshing RPCSEC_GSS cred ffff88007a413cf0 RPC: 61 call_refreshresult (status 0) RPC: 61 refreshing RPCSEC_GSS cred ffff88007a413cf0 RPC: 61 call_refresh (status 0) RPC: 61 call_refreshresult (status 0) RPC: 61 call_refresh (status 0) RPC: 61 call_refresh (status 0) RPC: 61 refreshing RPCSEC_GSS cred ffff88007a413cf0 RPC: 61 call_refreshresult (status 0) RPC: 61 call_refresh (status 0) RPC: 61 refreshing RPCSEC_GSS cred ffff88007a413cf0 RPC: 61 call_refresh (status 0) RPC: 61 refreshing RPCSEC_GSS cred ffff88007a413cf0 RPC: 61 refreshing RPCSEC_GSS cred ffff88007a413cf0 RPC: 61 call_refreshresult (status 0) RPC: 61 call_refresh (status 0) RPC: 61 call_refresh (status 0) RPC: 61 call_refresh (status 0) RPC: 61 call_refresh (status 0) RPC: 61 call_refreshresult (status 0) RPC: 61 refreshing RPCSEC_GSS cred ffff88007a413cf0 Signed-off-by: Weston Andros Adamson <dros@netapp.com> Cc: stable@vger.kernel.org # 2.6.37+ Signed-off-by: Trond Myklebust <trond.myklebust@primarydata.com>
2013-12-17 21:16:11 +04:00
if (rpcauth_uptodatecred(task)) {
task->tk_action = call_allocate;
sunrpc: Fix infinite loop in RPC state machine When a task enters call_refreshresult with status 0 from call_refresh and !rpcauth_uptodatecred(task) it enters call_refresh again with no rate-limiting or max number of retries. Instead of trying forever, make use of the retry path that other errors use. This only seems to be possible when the crrefresh callback is gss_refresh_null, which only happens when destroying the context. To reproduce: 1) mount with sec=krb5 (or sec=sys with krb5 negotiated for non FSID specific operations). 2) reboot - the client will be stuck and will need to be hard rebooted BUG: soft lockup - CPU#0 stuck for 22s! [kworker/0:2:46] Modules linked in: rpcsec_gss_krb5 nfsv4 nfs fscache ppdev crc32c_intel aesni_intel aes_x86_64 glue_helper lrw gf128mul ablk_helper cryptd serio_raw i2c_piix4 i2c_core e1000 parport_pc parport shpchp nfsd auth_rpcgss oid_registry exportfs nfs_acl lockd sunrpc autofs4 mptspi scsi_transport_spi mptscsih mptbase ata_generic floppy irq event stamp: 195724 hardirqs last enabled at (195723): [<ffffffff814a925c>] restore_args+0x0/0x30 hardirqs last disabled at (195724): [<ffffffff814b0a6a>] apic_timer_interrupt+0x6a/0x80 softirqs last enabled at (195722): [<ffffffff8103f583>] __do_softirq+0x1df/0x276 softirqs last disabled at (195717): [<ffffffff8103f852>] irq_exit+0x53/0x9a CPU: 0 PID: 46 Comm: kworker/0:2 Not tainted 3.13.0-rc3-branch-dros_testing+ #4 Hardware name: VMware, Inc. VMware Virtual Platform/440BX Desktop Reference Platform, BIOS 6.00 07/31/2013 Workqueue: rpciod rpc_async_schedule [sunrpc] task: ffff8800799c4260 ti: ffff880079002000 task.ti: ffff880079002000 RIP: 0010:[<ffffffffa0064fd4>] [<ffffffffa0064fd4>] __rpc_execute+0x8a/0x362 [sunrpc] RSP: 0018:ffff880079003d18 EFLAGS: 00000246 RAX: 0000000000000005 RBX: 0000000000000007 RCX: 0000000000000007 RDX: 0000000000000007 RSI: ffff88007aecbae8 RDI: ffff8800783d8900 RBP: ffff880079003d78 R08: ffff88006e30e9f8 R09: ffffffffa005a3d7 R10: ffff88006e30e7b0 R11: ffff8800783d8900 R12: ffffffffa006675e R13: ffff880079003ce8 R14: ffff88006e30e7b0 R15: ffff8800783d8900 FS: 0000000000000000(0000) GS:ffff88007f200000(0000) knlGS:0000000000000000 CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 CR2: 00007f3072333000 CR3: 0000000001a0b000 CR4: 00000000001407f0 Stack: ffff880079003d98 0000000000000246 0000000000000000 ffff88007a9a4830 ffff880000000000 ffffffff81073f47 ffff88007f212b00 ffff8800799c4260 ffff8800783d8988 ffff88007f212b00 ffffe8ffff604800 0000000000000000 Call Trace: [<ffffffff81073f47>] ? trace_hardirqs_on_caller+0x145/0x1a1 [<ffffffffa00652d3>] rpc_async_schedule+0x27/0x32 [sunrpc] [<ffffffff81052974>] process_one_work+0x211/0x3a5 [<ffffffff810528d5>] ? process_one_work+0x172/0x3a5 [<ffffffff81052eeb>] worker_thread+0x134/0x202 [<ffffffff81052db7>] ? rescuer_thread+0x280/0x280 [<ffffffff81052db7>] ? rescuer_thread+0x280/0x280 [<ffffffff810584a0>] kthread+0xc9/0xd1 [<ffffffff810583d7>] ? __kthread_parkme+0x61/0x61 [<ffffffff814afd6c>] ret_from_fork+0x7c/0xb0 [<ffffffff810583d7>] ? __kthread_parkme+0x61/0x61 Code: e8 87 63 fd e0 c6 05 10 dd 01 00 01 48 8b 43 70 4c 8d 6b 70 45 31 e4 a8 02 0f 85 d5 02 00 00 4c 8b 7b 48 48 c7 43 48 00 00 00 00 <4c> 8b 4b 50 4d 85 ff 75 0c 4d 85 c9 4d 89 cf 0f 84 32 01 00 00 And the output of "rpcdebug -m rpc -s all": RPC: 61 call_refresh (status 0) RPC: 61 call_refresh (status 0) RPC: 61 refreshing RPCSEC_GSS cred ffff88007a413cf0 RPC: 61 refreshing RPCSEC_GSS cred ffff88007a413cf0 RPC: 61 call_refreshresult (status 0) RPC: 61 refreshing RPCSEC_GSS cred ffff88007a413cf0 RPC: 61 call_refreshresult (status 0) RPC: 61 refreshing RPCSEC_GSS cred ffff88007a413cf0 RPC: 61 call_refresh (status 0) RPC: 61 call_refreshresult (status 0) RPC: 61 call_refresh (status 0) RPC: 61 call_refresh (status 0) RPC: 61 refreshing RPCSEC_GSS cred ffff88007a413cf0 RPC: 61 call_refreshresult (status 0) RPC: 61 call_refresh (status 0) RPC: 61 refreshing RPCSEC_GSS cred ffff88007a413cf0 RPC: 61 call_refresh (status 0) RPC: 61 refreshing RPCSEC_GSS cred ffff88007a413cf0 RPC: 61 refreshing RPCSEC_GSS cred ffff88007a413cf0 RPC: 61 call_refreshresult (status 0) RPC: 61 call_refresh (status 0) RPC: 61 call_refresh (status 0) RPC: 61 call_refresh (status 0) RPC: 61 call_refresh (status 0) RPC: 61 call_refreshresult (status 0) RPC: 61 refreshing RPCSEC_GSS cred ffff88007a413cf0 Signed-off-by: Weston Andros Adamson <dros@netapp.com> Cc: stable@vger.kernel.org # 2.6.37+ Signed-off-by: Trond Myklebust <trond.myklebust@primarydata.com>
2013-12-17 21:16:11 +04:00
return;
}
/* Use rate-limiting and a max number of retries if refresh
* had status 0 but failed to update the cred.
*/
fallthrough;
case -ETIMEDOUT:
rpc_delay(task, 3*HZ);
fallthrough;
case -EAGAIN:
status = -EACCES;
fallthrough;
case -EKEYEXPIRED:
if (!task->tk_cred_retry)
break;
task->tk_cred_retry--;
trace_rpc_retry_refresh_status(task);
return;
case -ENOMEM:
rpc_delay(task, HZ >> 4);
return;
}
trace_rpc_refresh_status(task);
rpc_call_rpcerror(task, status);
}
/*
* 2b. Allocate the buffer. For details, see sched.c:rpc_malloc.
* (Note: buffer memory is freed in xprt_release).
*/
static void
call_allocate(struct rpc_task *task)
{
const struct rpc_auth *auth = task->tk_rqstp->rq_cred->cr_auth;
struct rpc_rqst *req = task->tk_rqstp;
struct rpc_xprt *xprt = req->rq_xprt;
const struct rpc_procinfo *proc = task->tk_msg.rpc_proc;
int status;
task->tk_status = 0;
task->tk_action = call_encode;
if (req->rq_buffer)
return;
if (proc->p_proc != 0) {
BUG_ON(proc->p_arglen == 0);
if (proc->p_decode != NULL)
BUG_ON(proc->p_replen == 0);
}
/*
* Calculate the size (in quads) of the RPC call
* and reply headers, and convert both values
* to byte sizes.
*/
req->rq_callsize = RPC_CALLHDRSIZE + (auth->au_cslack << 1) +
proc->p_arglen;
req->rq_callsize <<= 2;
/*
* Note: the reply buffer must at minimum allocate enough space
* for the 'struct accepted_reply' from RFC5531.
*/
req->rq_rcvsize = RPC_REPHDRSIZE + auth->au_rslack + \
max_t(size_t, proc->p_replen, 2);
req->rq_rcvsize <<= 2;
status = xprt->ops->buf_alloc(task);
trace_rpc_buf_alloc(task, status);
if (status == 0)
return;
if (status != -ENOMEM) {
rpc_call_rpcerror(task, status);
return;
}
if (RPC_IS_ASYNC(task) || !fatal_signal_pending(current)) {
task->tk_action = call_allocate;
rpc_delay(task, HZ>>4);
return;
}
rpc_call_rpcerror(task, -ERESTARTSYS);
}
static int
rpc_task_need_encode(struct rpc_task *task)
{
return test_bit(RPC_TASK_NEED_XMIT, &task->tk_runstate) == 0 &&
(!(task->tk_flags & RPC_TASK_SENT) ||
!(task->tk_flags & RPC_TASK_NO_RETRANS_TIMEOUT) ||
xprt_request_need_retransmit(task));
}
static void
rpc_xdr_encode(struct rpc_task *task)
{
struct rpc_rqst *req = task->tk_rqstp;
struct xdr_stream xdr;
xdr_buf_init(&req->rq_snd_buf,
req->rq_buffer,
req->rq_callsize);
xdr_buf_init(&req->rq_rcv_buf,
req->rq_rbuffer,
req->rq_rcvsize);
req->rq_reply_bytes_recvd = 0;
req->rq_snd_buf.head[0].iov_len = 0;
xdr_init_encode(&xdr, &req->rq_snd_buf,
req->rq_snd_buf.head[0].iov_base, req);
if (rpc_encode_header(task, &xdr))
return;
task->tk_status = rpcauth_wrap_req(task, &xdr);
}
/*
* 3. Encode arguments of an RPC call
*/
static void
call_encode(struct rpc_task *task)
{
if (!rpc_task_need_encode(task))
goto out;
/* Dequeue task from the receive queue while we're encoding */
xprt_request_dequeue_xprt(task);
/* Encode here so that rpcsec_gss can use correct sequence number. */
rpc_xdr_encode(task);
/* Add task to reply queue before transmission to avoid races */
if (task->tk_status == 0 && rpc_reply_expected(task))
task->tk_status = xprt_request_enqueue_receive(task);
/* Did the encode result in an error condition? */
if (task->tk_status != 0) {
/* Was the error nonfatal? */
switch (task->tk_status) {
case -EAGAIN:
case -ENOMEM:
rpc_delay(task, HZ >> 4);
break;
case -EKEYEXPIRED:
if (!task->tk_cred_retry) {
rpc_call_rpcerror(task, task->tk_status);
} else {
task->tk_action = call_refresh;
task->tk_cred_retry--;
trace_rpc_retry_refresh_status(task);
}
break;
default:
rpc_call_rpcerror(task, task->tk_status);
}
return;
}
xprt_request_enqueue_transmit(task);
out:
task->tk_action = call_transmit;
/* Check that the connection is OK */
if (!xprt_bound(task->tk_xprt))
task->tk_action = call_bind;
else if (!xprt_connected(task->tk_xprt))
task->tk_action = call_connect;
}
/*
* Helpers to check if the task was already transmitted, and
* to take action when that is the case.
*/
static bool
rpc_task_transmitted(struct rpc_task *task)
{
return !test_bit(RPC_TASK_NEED_XMIT, &task->tk_runstate);
}
static void
rpc_task_handle_transmitted(struct rpc_task *task)
{
xprt_end_transmit(task);
task->tk_action = call_transmit_status;
}
/*
* 4. Get the server port number if not yet set
*/
static void
call_bind(struct rpc_task *task)
{
struct rpc_xprt *xprt = task->tk_rqstp->rq_xprt;
if (rpc_task_transmitted(task)) {
rpc_task_handle_transmitted(task);
return;
}
if (xprt_bound(xprt)) {
task->tk_action = call_connect;
return;
}
task->tk_action = call_bind_status;
if (!xprt_prepare_transmit(task))
return;
xprt->ops->rpcbind(task);
}
/*
* 4a. Sort out bind result
*/
static void
call_bind_status(struct rpc_task *task)
{
struct rpc_xprt *xprt = task->tk_rqstp->rq_xprt;
int status = -EIO;
if (rpc_task_transmitted(task)) {
rpc_task_handle_transmitted(task);
return;
}
if (task->tk_status >= 0)
goto out_next;
if (xprt_bound(xprt)) {
task->tk_status = 0;
goto out_next;
}
switch (task->tk_status) {
case -ENOMEM:
rpc_delay(task, HZ >> 2);
goto retry_timeout;
case -EACCES:
trace_rpcb_prog_unavail_err(task);
/* fail immediately if this is an RPC ping */
if (task->tk_msg.rpc_proc->p_proc == 0) {
status = -EOPNOTSUPP;
break;
}
rpc_delay(task, 3*HZ);
goto retry_timeout;
case -ENOBUFS:
rpc_delay(task, HZ >> 2);
goto retry_timeout;
case -EAGAIN:
goto retry_timeout;
case -ETIMEDOUT:
trace_rpcb_timeout_err(task);
goto retry_timeout;
case -EPFNOSUPPORT:
/* server doesn't support any rpcbind version we know of */
trace_rpcb_bind_version_err(task);
break;
case -EPROTONOSUPPORT:
trace_rpcb_bind_version_err(task);
goto retry_timeout;
case -ECONNREFUSED: /* connection problems */
case -ECONNRESET:
case -ECONNABORTED:
case -ENOTCONN:
case -EHOSTDOWN:
case -ENETDOWN:
case -EHOSTUNREACH:
case -ENETUNREACH:
case -EPIPE:
trace_rpcb_unreachable_err(task);
if (!RPC_IS_SOFTCONN(task)) {
rpc_delay(task, 5*HZ);
goto retry_timeout;
}
status = task->tk_status;
break;
default:
trace_rpcb_unrecognized_err(task);
}
rpc_call_rpcerror(task, status);
return;
out_next:
task->tk_action = call_connect;
return;
retry_timeout:
task->tk_status = 0;
task->tk_action = call_bind;
rpc_check_timeout(task);
}
/*
* 4b. Connect to the RPC server
*/
static void
call_connect(struct rpc_task *task)
{
struct rpc_xprt *xprt = task->tk_rqstp->rq_xprt;
if (rpc_task_transmitted(task)) {
rpc_task_handle_transmitted(task);
return;
}
if (xprt_connected(xprt)) {
task->tk_action = call_transmit;
return;
}
task->tk_action = call_connect_status;
if (task->tk_status < 0)
return;
if (task->tk_flags & RPC_TASK_NOCONNECT) {
rpc_call_rpcerror(task, -ENOTCONN);
return;
}
if (!xprt_prepare_transmit(task))
return;
xprt_connect(task);
}
/*
* 4c. Sort out connect result
*/
static void
call_connect_status(struct rpc_task *task)
{
struct rpc_xprt *xprt = task->tk_rqstp->rq_xprt;
struct rpc_clnt *clnt = task->tk_client;
int status = task->tk_status;
if (rpc_task_transmitted(task)) {
rpc_task_handle_transmitted(task);
return;
}
trace_rpc_connect_status(task);
if (task->tk_status == 0) {
clnt->cl_stats->netreconn++;
goto out_next;
}
if (xprt_connected(xprt)) {
task->tk_status = 0;
goto out_next;
}
task->tk_status = 0;
switch (status) {
case -ECONNREFUSED:
case -ECONNRESET:
/* A positive refusal suggests a rebind is needed. */
if (RPC_IS_SOFTCONN(task))
break;
if (clnt->cl_autobind) {
rpc_force_rebind(clnt);
goto out_retry;
}
fallthrough;
case -ECONNABORTED:
case -ENETDOWN:
case -ENETUNREACH:
case -EHOSTUNREACH:
case -EPIPE:
case -EPROTO:
xprt_conditional_disconnect(task->tk_rqstp->rq_xprt,
task->tk_rqstp->rq_connect_cookie);
if (RPC_IS_SOFTCONN(task))
break;
/* retry with existing socket, after a delay */
rpc_delay(task, 3*HZ);
fallthrough;
case -EADDRINUSE:
case -ENOTCONN:
case -EAGAIN:
case -ETIMEDOUT:
if (!(task->tk_flags & RPC_TASK_NO_ROUND_ROBIN) &&
(task->tk_flags & RPC_TASK_MOVEABLE) &&
test_bit(XPRT_REMOVE, &xprt->state)) {
struct rpc_xprt *saved = task->tk_xprt;
struct rpc_xprt_switch *xps;
xps = rpc_clnt_xprt_switch_get(clnt);
if (xps->xps_nxprts > 1) {
long value;
xprt_release(task);
value = atomic_long_dec_return(&xprt->queuelen);
if (value == 0)
rpc_xprt_switch_remove_xprt(xps, saved,
true);
xprt_put(saved);
task->tk_xprt = NULL;
task->tk_action = call_start;
}
xprt_switch_put(xps);
if (!task->tk_xprt)
goto out;
}
goto out_retry;
case -ENOBUFS:
rpc_delay(task, HZ >> 2);
goto out_retry;
}
rpc_call_rpcerror(task, status);
return;
out_next:
task->tk_action = call_transmit;
return;
out_retry:
/* Check for timeouts before looping back to call_bind */
task->tk_action = call_bind;
out:
rpc_check_timeout(task);
}
/*
* 5. Transmit the RPC request, and wait for reply
*/
static void
call_transmit(struct rpc_task *task)
{
if (rpc_task_transmitted(task)) {
rpc_task_handle_transmitted(task);
return;
}
task->tk_action = call_transmit_status;
if (!xprt_prepare_transmit(task))
return;
task->tk_status = 0;
if (test_bit(RPC_TASK_NEED_XMIT, &task->tk_runstate)) {
if (!xprt_connected(task->tk_xprt)) {
task->tk_status = -ENOTCONN;
return;
}
xprt_transmit(task);
}
xprt_end_transmit(task);
}
/*
* 5a. Handle cleanup after a transmission
*/
static void
call_transmit_status(struct rpc_task *task)
{
task->tk_action = call_status;
/*
* Common case: success. Force the compiler to put this
* test first.
*/
if (rpc_task_transmitted(task)) {
task->tk_status = 0;
xprt_request_wait_receive(task);
return;
}
switch (task->tk_status) {
default:
break;
case -EBADMSG:
task->tk_status = 0;
task->tk_action = call_encode;
break;
/*
* Special cases: if we've been waiting on the
* socket's write_space() callback, or if the
* socket just returned a connection error,
* then hold onto the transport lock.
*/
case -ENOMEM:
case -ENOBUFS:
rpc_delay(task, HZ>>2);
fallthrough;
case -EBADSLT:
case -EAGAIN:
task->tk_action = call_transmit;
task->tk_status = 0;
break;
case -ECONNREFUSED:
case -EHOSTDOWN:
case -ENETDOWN:
case -EHOSTUNREACH:
case -ENETUNREACH:
rpc: Add -EPERM processing for xs_udp_send_request() If an iptables drop rule is added for an nfs server, the client can end up in a softlockup. Because of the way that xs_sendpages() is structured, the -EPERM is ignored since the prior bits of the packet may have been successfully queued and thus xs_sendpages() returns a non-zero value. Then, xs_udp_send_request() thinks that because some bits were queued it should return -EAGAIN. We then try the request again and again, resulting in cpu spinning. Reproducer: 1) open a file on the nfs server '/nfs/foo' (mounted using udp) 2) iptables -A OUTPUT -d <nfs server ip> -j DROP 3) write to /nfs/foo 4) close /nfs/foo 5) iptables -D OUTPUT -d <nfs server ip> -j DROP The softlockup occurs in step 4 above. The previous patch, allows xs_sendpages() to return both a sent count and any error values that may have occurred. Thus, if we get an -EPERM, return that to the higher level code. With this patch in place we can successfully abort the above sequence and avoid the softlockup. I also tried the above test case on an nfs mount on tcp and although the system does not softlockup, I still ended up with the 'hung_task' firing after 120 seconds, due to the i/o being stuck. The tcp case appears a bit harder to fix, since -EPERM appears to get ignored much lower down in the stack and does not propogate up to xs_sendpages(). This case is not quite as insidious as the softlockup and it is not addressed here. Reported-by: Yigong Lou <ylou@akamai.com> Signed-off-by: Jason Baron <jbaron@akamai.com> Signed-off-by: Trond Myklebust <trond.myklebust@primarydata.com>
2014-09-24 22:08:04 +04:00
case -EPERM:
if (RPC_IS_SOFTCONN(task)) {
if (!task->tk_msg.rpc_proc->p_proc)
trace_xprt_ping(task->tk_xprt,
task->tk_status);
rpc_call_rpcerror(task, task->tk_status);
return;
}
fallthrough;
case -ECONNRESET:
case -ECONNABORTED:
case -EADDRINUSE:
case -ENOTCONN:
case -EPIPE:
task->tk_action = call_bind;
task->tk_status = 0;
break;
}
rpc_check_timeout(task);
}
#if defined(CONFIG_SUNRPC_BACKCHANNEL)
static void call_bc_transmit(struct rpc_task *task);
static void call_bc_transmit_status(struct rpc_task *task);
static void
call_bc_encode(struct rpc_task *task)
{
xprt_request_enqueue_transmit(task);
task->tk_action = call_bc_transmit;
}
nfs41: Add backchannel processing support to RPC state machine Adds rpc_run_bc_task() which is called by the NFS callback service to process backchannel requests. It performs similar work to rpc_run_task() though "schedules" the backchannel task to be executed starting at the call_trasmit state in the RPC state machine. It also introduces some miscellaneous updates to the argument validation, call_transmit, and transport cleanup functions to take into account that there are now forechannel and backchannel tasks. Backchannel requests do not carry an RPC message structure, since the payload has already been XDR encoded using the existing NFSv4 callback mechanism. Introduce a new transmit state for the client to reply on to backchannel requests. This new state simply reserves the transport and issues the reply. In case of a connection related error, disconnects the transport and drops the reply. It requires the forechannel to re-establish the connection and the server to retransmit the request, as stated in NFSv4.1 section 2.9.2 "Client and Server Transport Behavior". Note: There is no need to loop attempting to reserve the transport. If EAGAIN is returned by xprt_prepare_transmit(), return with tk_status == 0, setting tk_action to call_bc_transmit. rpc_execute() will invoke it again after the task is taken off the sleep queue. [nfs41: rpc_run_bc_task() need not be exported outside RPC module] [nfs41: New call_bc_transmit RPC state] Signed-off-by: Ricardo Labiaga <Ricardo.Labiaga@netapp.com> Signed-off-by: Benny Halevy <bhalevy@panasas.com> [nfs41: Backchannel: No need to loop in call_bc_transmit()] Signed-off-by: Andy Adamson <andros@netapp.com> Signed-off-by: Ricardo Labiaga <Ricardo.Labiaga@netapp.com> Signed-off-by: Benny Halevy <bhalevy@panasas.com> [rpc_count_iostats incorrectly exits early] Signed-off-by: Ricardo Labiaga <Ricardo.Labiaga@netapp.com> Signed-off-by: Benny Halevy <bhalevy@panasas.com> [Convert rpc_reply_expected() to inline function] [Remove unnecessary BUG_ON()] [Rename variable] Signed-off-by: Ricardo Labiaga <Ricardo.Labiaga@netapp.com> Signed-off-by: Benny Halevy <bhalevy@panasas.com>
2009-04-01 17:23:03 +04:00
/*
* 5b. Send the backchannel RPC reply. On error, drop the reply. In
* addition, disconnect on connectivity errors.
*/
static void
call_bc_transmit(struct rpc_task *task)
{
task->tk_action = call_bc_transmit_status;
if (test_bit(RPC_TASK_NEED_XMIT, &task->tk_runstate)) {
if (!xprt_prepare_transmit(task))
return;
task->tk_status = 0;
xprt_transmit(task);
nfs41: Add backchannel processing support to RPC state machine Adds rpc_run_bc_task() which is called by the NFS callback service to process backchannel requests. It performs similar work to rpc_run_task() though "schedules" the backchannel task to be executed starting at the call_trasmit state in the RPC state machine. It also introduces some miscellaneous updates to the argument validation, call_transmit, and transport cleanup functions to take into account that there are now forechannel and backchannel tasks. Backchannel requests do not carry an RPC message structure, since the payload has already been XDR encoded using the existing NFSv4 callback mechanism. Introduce a new transmit state for the client to reply on to backchannel requests. This new state simply reserves the transport and issues the reply. In case of a connection related error, disconnects the transport and drops the reply. It requires the forechannel to re-establish the connection and the server to retransmit the request, as stated in NFSv4.1 section 2.9.2 "Client and Server Transport Behavior". Note: There is no need to loop attempting to reserve the transport. If EAGAIN is returned by xprt_prepare_transmit(), return with tk_status == 0, setting tk_action to call_bc_transmit. rpc_execute() will invoke it again after the task is taken off the sleep queue. [nfs41: rpc_run_bc_task() need not be exported outside RPC module] [nfs41: New call_bc_transmit RPC state] Signed-off-by: Ricardo Labiaga <Ricardo.Labiaga@netapp.com> Signed-off-by: Benny Halevy <bhalevy@panasas.com> [nfs41: Backchannel: No need to loop in call_bc_transmit()] Signed-off-by: Andy Adamson <andros@netapp.com> Signed-off-by: Ricardo Labiaga <Ricardo.Labiaga@netapp.com> Signed-off-by: Benny Halevy <bhalevy@panasas.com> [rpc_count_iostats incorrectly exits early] Signed-off-by: Ricardo Labiaga <Ricardo.Labiaga@netapp.com> Signed-off-by: Benny Halevy <bhalevy@panasas.com> [Convert rpc_reply_expected() to inline function] [Remove unnecessary BUG_ON()] [Rename variable] Signed-off-by: Ricardo Labiaga <Ricardo.Labiaga@netapp.com> Signed-off-by: Benny Halevy <bhalevy@panasas.com>
2009-04-01 17:23:03 +04:00
}
xprt_end_transmit(task);
}
nfs41: Add backchannel processing support to RPC state machine Adds rpc_run_bc_task() which is called by the NFS callback service to process backchannel requests. It performs similar work to rpc_run_task() though "schedules" the backchannel task to be executed starting at the call_trasmit state in the RPC state machine. It also introduces some miscellaneous updates to the argument validation, call_transmit, and transport cleanup functions to take into account that there are now forechannel and backchannel tasks. Backchannel requests do not carry an RPC message structure, since the payload has already been XDR encoded using the existing NFSv4 callback mechanism. Introduce a new transmit state for the client to reply on to backchannel requests. This new state simply reserves the transport and issues the reply. In case of a connection related error, disconnects the transport and drops the reply. It requires the forechannel to re-establish the connection and the server to retransmit the request, as stated in NFSv4.1 section 2.9.2 "Client and Server Transport Behavior". Note: There is no need to loop attempting to reserve the transport. If EAGAIN is returned by xprt_prepare_transmit(), return with tk_status == 0, setting tk_action to call_bc_transmit. rpc_execute() will invoke it again after the task is taken off the sleep queue. [nfs41: rpc_run_bc_task() need not be exported outside RPC module] [nfs41: New call_bc_transmit RPC state] Signed-off-by: Ricardo Labiaga <Ricardo.Labiaga@netapp.com> Signed-off-by: Benny Halevy <bhalevy@panasas.com> [nfs41: Backchannel: No need to loop in call_bc_transmit()] Signed-off-by: Andy Adamson <andros@netapp.com> Signed-off-by: Ricardo Labiaga <Ricardo.Labiaga@netapp.com> Signed-off-by: Benny Halevy <bhalevy@panasas.com> [rpc_count_iostats incorrectly exits early] Signed-off-by: Ricardo Labiaga <Ricardo.Labiaga@netapp.com> Signed-off-by: Benny Halevy <bhalevy@panasas.com> [Convert rpc_reply_expected() to inline function] [Remove unnecessary BUG_ON()] [Rename variable] Signed-off-by: Ricardo Labiaga <Ricardo.Labiaga@netapp.com> Signed-off-by: Benny Halevy <bhalevy@panasas.com>
2009-04-01 17:23:03 +04:00
static void
call_bc_transmit_status(struct rpc_task *task)
{
struct rpc_rqst *req = task->tk_rqstp;
if (rpc_task_transmitted(task))
task->tk_status = 0;
nfs41: Add backchannel processing support to RPC state machine Adds rpc_run_bc_task() which is called by the NFS callback service to process backchannel requests. It performs similar work to rpc_run_task() though "schedules" the backchannel task to be executed starting at the call_trasmit state in the RPC state machine. It also introduces some miscellaneous updates to the argument validation, call_transmit, and transport cleanup functions to take into account that there are now forechannel and backchannel tasks. Backchannel requests do not carry an RPC message structure, since the payload has already been XDR encoded using the existing NFSv4 callback mechanism. Introduce a new transmit state for the client to reply on to backchannel requests. This new state simply reserves the transport and issues the reply. In case of a connection related error, disconnects the transport and drops the reply. It requires the forechannel to re-establish the connection and the server to retransmit the request, as stated in NFSv4.1 section 2.9.2 "Client and Server Transport Behavior". Note: There is no need to loop attempting to reserve the transport. If EAGAIN is returned by xprt_prepare_transmit(), return with tk_status == 0, setting tk_action to call_bc_transmit. rpc_execute() will invoke it again after the task is taken off the sleep queue. [nfs41: rpc_run_bc_task() need not be exported outside RPC module] [nfs41: New call_bc_transmit RPC state] Signed-off-by: Ricardo Labiaga <Ricardo.Labiaga@netapp.com> Signed-off-by: Benny Halevy <bhalevy@panasas.com> [nfs41: Backchannel: No need to loop in call_bc_transmit()] Signed-off-by: Andy Adamson <andros@netapp.com> Signed-off-by: Ricardo Labiaga <Ricardo.Labiaga@netapp.com> Signed-off-by: Benny Halevy <bhalevy@panasas.com> [rpc_count_iostats incorrectly exits early] Signed-off-by: Ricardo Labiaga <Ricardo.Labiaga@netapp.com> Signed-off-by: Benny Halevy <bhalevy@panasas.com> [Convert rpc_reply_expected() to inline function] [Remove unnecessary BUG_ON()] [Rename variable] Signed-off-by: Ricardo Labiaga <Ricardo.Labiaga@netapp.com> Signed-off-by: Benny Halevy <bhalevy@panasas.com>
2009-04-01 17:23:03 +04:00
switch (task->tk_status) {
case 0:
/* Success */
case -ENETDOWN:
nfs41: Add backchannel processing support to RPC state machine Adds rpc_run_bc_task() which is called by the NFS callback service to process backchannel requests. It performs similar work to rpc_run_task() though "schedules" the backchannel task to be executed starting at the call_trasmit state in the RPC state machine. It also introduces some miscellaneous updates to the argument validation, call_transmit, and transport cleanup functions to take into account that there are now forechannel and backchannel tasks. Backchannel requests do not carry an RPC message structure, since the payload has already been XDR encoded using the existing NFSv4 callback mechanism. Introduce a new transmit state for the client to reply on to backchannel requests. This new state simply reserves the transport and issues the reply. In case of a connection related error, disconnects the transport and drops the reply. It requires the forechannel to re-establish the connection and the server to retransmit the request, as stated in NFSv4.1 section 2.9.2 "Client and Server Transport Behavior". Note: There is no need to loop attempting to reserve the transport. If EAGAIN is returned by xprt_prepare_transmit(), return with tk_status == 0, setting tk_action to call_bc_transmit. rpc_execute() will invoke it again after the task is taken off the sleep queue. [nfs41: rpc_run_bc_task() need not be exported outside RPC module] [nfs41: New call_bc_transmit RPC state] Signed-off-by: Ricardo Labiaga <Ricardo.Labiaga@netapp.com> Signed-off-by: Benny Halevy <bhalevy@panasas.com> [nfs41: Backchannel: No need to loop in call_bc_transmit()] Signed-off-by: Andy Adamson <andros@netapp.com> Signed-off-by: Ricardo Labiaga <Ricardo.Labiaga@netapp.com> Signed-off-by: Benny Halevy <bhalevy@panasas.com> [rpc_count_iostats incorrectly exits early] Signed-off-by: Ricardo Labiaga <Ricardo.Labiaga@netapp.com> Signed-off-by: Benny Halevy <bhalevy@panasas.com> [Convert rpc_reply_expected() to inline function] [Remove unnecessary BUG_ON()] [Rename variable] Signed-off-by: Ricardo Labiaga <Ricardo.Labiaga@netapp.com> Signed-off-by: Benny Halevy <bhalevy@panasas.com>
2009-04-01 17:23:03 +04:00
case -EHOSTDOWN:
case -EHOSTUNREACH:
case -ENETUNREACH:
case -ECONNRESET:
case -ECONNREFUSED:
case -EADDRINUSE:
case -ENOTCONN:
case -EPIPE:
break;
case -ENOMEM:
case -ENOBUFS:
rpc_delay(task, HZ>>2);
fallthrough;
case -EBADSLT:
case -EAGAIN:
task->tk_status = 0;
task->tk_action = call_bc_transmit;
return;
nfs41: Add backchannel processing support to RPC state machine Adds rpc_run_bc_task() which is called by the NFS callback service to process backchannel requests. It performs similar work to rpc_run_task() though "schedules" the backchannel task to be executed starting at the call_trasmit state in the RPC state machine. It also introduces some miscellaneous updates to the argument validation, call_transmit, and transport cleanup functions to take into account that there are now forechannel and backchannel tasks. Backchannel requests do not carry an RPC message structure, since the payload has already been XDR encoded using the existing NFSv4 callback mechanism. Introduce a new transmit state for the client to reply on to backchannel requests. This new state simply reserves the transport and issues the reply. In case of a connection related error, disconnects the transport and drops the reply. It requires the forechannel to re-establish the connection and the server to retransmit the request, as stated in NFSv4.1 section 2.9.2 "Client and Server Transport Behavior". Note: There is no need to loop attempting to reserve the transport. If EAGAIN is returned by xprt_prepare_transmit(), return with tk_status == 0, setting tk_action to call_bc_transmit. rpc_execute() will invoke it again after the task is taken off the sleep queue. [nfs41: rpc_run_bc_task() need not be exported outside RPC module] [nfs41: New call_bc_transmit RPC state] Signed-off-by: Ricardo Labiaga <Ricardo.Labiaga@netapp.com> Signed-off-by: Benny Halevy <bhalevy@panasas.com> [nfs41: Backchannel: No need to loop in call_bc_transmit()] Signed-off-by: Andy Adamson <andros@netapp.com> Signed-off-by: Ricardo Labiaga <Ricardo.Labiaga@netapp.com> Signed-off-by: Benny Halevy <bhalevy@panasas.com> [rpc_count_iostats incorrectly exits early] Signed-off-by: Ricardo Labiaga <Ricardo.Labiaga@netapp.com> Signed-off-by: Benny Halevy <bhalevy@panasas.com> [Convert rpc_reply_expected() to inline function] [Remove unnecessary BUG_ON()] [Rename variable] Signed-off-by: Ricardo Labiaga <Ricardo.Labiaga@netapp.com> Signed-off-by: Benny Halevy <bhalevy@panasas.com>
2009-04-01 17:23:03 +04:00
case -ETIMEDOUT:
/*
* Problem reaching the server. Disconnect and let the
* forechannel reestablish the connection. The server will
* have to retransmit the backchannel request and we'll
* reprocess it. Since these ops are idempotent, there's no
* need to cache our reply at this time.
*/
printk(KERN_NOTICE "RPC: Could not send backchannel reply "
"error: %d\n", task->tk_status);
xprt_conditional_disconnect(req->rq_xprt,
nfs41: Add backchannel processing support to RPC state machine Adds rpc_run_bc_task() which is called by the NFS callback service to process backchannel requests. It performs similar work to rpc_run_task() though "schedules" the backchannel task to be executed starting at the call_trasmit state in the RPC state machine. It also introduces some miscellaneous updates to the argument validation, call_transmit, and transport cleanup functions to take into account that there are now forechannel and backchannel tasks. Backchannel requests do not carry an RPC message structure, since the payload has already been XDR encoded using the existing NFSv4 callback mechanism. Introduce a new transmit state for the client to reply on to backchannel requests. This new state simply reserves the transport and issues the reply. In case of a connection related error, disconnects the transport and drops the reply. It requires the forechannel to re-establish the connection and the server to retransmit the request, as stated in NFSv4.1 section 2.9.2 "Client and Server Transport Behavior". Note: There is no need to loop attempting to reserve the transport. If EAGAIN is returned by xprt_prepare_transmit(), return with tk_status == 0, setting tk_action to call_bc_transmit. rpc_execute() will invoke it again after the task is taken off the sleep queue. [nfs41: rpc_run_bc_task() need not be exported outside RPC module] [nfs41: New call_bc_transmit RPC state] Signed-off-by: Ricardo Labiaga <Ricardo.Labiaga@netapp.com> Signed-off-by: Benny Halevy <bhalevy@panasas.com> [nfs41: Backchannel: No need to loop in call_bc_transmit()] Signed-off-by: Andy Adamson <andros@netapp.com> Signed-off-by: Ricardo Labiaga <Ricardo.Labiaga@netapp.com> Signed-off-by: Benny Halevy <bhalevy@panasas.com> [rpc_count_iostats incorrectly exits early] Signed-off-by: Ricardo Labiaga <Ricardo.Labiaga@netapp.com> Signed-off-by: Benny Halevy <bhalevy@panasas.com> [Convert rpc_reply_expected() to inline function] [Remove unnecessary BUG_ON()] [Rename variable] Signed-off-by: Ricardo Labiaga <Ricardo.Labiaga@netapp.com> Signed-off-by: Benny Halevy <bhalevy@panasas.com>
2009-04-01 17:23:03 +04:00
req->rq_connect_cookie);
break;
default:
/*
* We were unable to reply and will have to drop the
* request. The server should reconnect and retransmit.
*/
printk(KERN_NOTICE "RPC: Could not send backchannel reply "
"error: %d\n", task->tk_status);
break;
}
task->tk_action = rpc_exit_task;
nfs41: Add backchannel processing support to RPC state machine Adds rpc_run_bc_task() which is called by the NFS callback service to process backchannel requests. It performs similar work to rpc_run_task() though "schedules" the backchannel task to be executed starting at the call_trasmit state in the RPC state machine. It also introduces some miscellaneous updates to the argument validation, call_transmit, and transport cleanup functions to take into account that there are now forechannel and backchannel tasks. Backchannel requests do not carry an RPC message structure, since the payload has already been XDR encoded using the existing NFSv4 callback mechanism. Introduce a new transmit state for the client to reply on to backchannel requests. This new state simply reserves the transport and issues the reply. In case of a connection related error, disconnects the transport and drops the reply. It requires the forechannel to re-establish the connection and the server to retransmit the request, as stated in NFSv4.1 section 2.9.2 "Client and Server Transport Behavior". Note: There is no need to loop attempting to reserve the transport. If EAGAIN is returned by xprt_prepare_transmit(), return with tk_status == 0, setting tk_action to call_bc_transmit. rpc_execute() will invoke it again after the task is taken off the sleep queue. [nfs41: rpc_run_bc_task() need not be exported outside RPC module] [nfs41: New call_bc_transmit RPC state] Signed-off-by: Ricardo Labiaga <Ricardo.Labiaga@netapp.com> Signed-off-by: Benny Halevy <bhalevy@panasas.com> [nfs41: Backchannel: No need to loop in call_bc_transmit()] Signed-off-by: Andy Adamson <andros@netapp.com> Signed-off-by: Ricardo Labiaga <Ricardo.Labiaga@netapp.com> Signed-off-by: Benny Halevy <bhalevy@panasas.com> [rpc_count_iostats incorrectly exits early] Signed-off-by: Ricardo Labiaga <Ricardo.Labiaga@netapp.com> Signed-off-by: Benny Halevy <bhalevy@panasas.com> [Convert rpc_reply_expected() to inline function] [Remove unnecessary BUG_ON()] [Rename variable] Signed-off-by: Ricardo Labiaga <Ricardo.Labiaga@netapp.com> Signed-off-by: Benny Halevy <bhalevy@panasas.com>
2009-04-01 17:23:03 +04:00
}
#endif /* CONFIG_SUNRPC_BACKCHANNEL */
nfs41: Add backchannel processing support to RPC state machine Adds rpc_run_bc_task() which is called by the NFS callback service to process backchannel requests. It performs similar work to rpc_run_task() though "schedules" the backchannel task to be executed starting at the call_trasmit state in the RPC state machine. It also introduces some miscellaneous updates to the argument validation, call_transmit, and transport cleanup functions to take into account that there are now forechannel and backchannel tasks. Backchannel requests do not carry an RPC message structure, since the payload has already been XDR encoded using the existing NFSv4 callback mechanism. Introduce a new transmit state for the client to reply on to backchannel requests. This new state simply reserves the transport and issues the reply. In case of a connection related error, disconnects the transport and drops the reply. It requires the forechannel to re-establish the connection and the server to retransmit the request, as stated in NFSv4.1 section 2.9.2 "Client and Server Transport Behavior". Note: There is no need to loop attempting to reserve the transport. If EAGAIN is returned by xprt_prepare_transmit(), return with tk_status == 0, setting tk_action to call_bc_transmit. rpc_execute() will invoke it again after the task is taken off the sleep queue. [nfs41: rpc_run_bc_task() need not be exported outside RPC module] [nfs41: New call_bc_transmit RPC state] Signed-off-by: Ricardo Labiaga <Ricardo.Labiaga@netapp.com> Signed-off-by: Benny Halevy <bhalevy@panasas.com> [nfs41: Backchannel: No need to loop in call_bc_transmit()] Signed-off-by: Andy Adamson <andros@netapp.com> Signed-off-by: Ricardo Labiaga <Ricardo.Labiaga@netapp.com> Signed-off-by: Benny Halevy <bhalevy@panasas.com> [rpc_count_iostats incorrectly exits early] Signed-off-by: Ricardo Labiaga <Ricardo.Labiaga@netapp.com> Signed-off-by: Benny Halevy <bhalevy@panasas.com> [Convert rpc_reply_expected() to inline function] [Remove unnecessary BUG_ON()] [Rename variable] Signed-off-by: Ricardo Labiaga <Ricardo.Labiaga@netapp.com> Signed-off-by: Benny Halevy <bhalevy@panasas.com>
2009-04-01 17:23:03 +04:00
/*
* 6. Sort out the RPC call status
*/
static void
call_status(struct rpc_task *task)
{
struct rpc_clnt *clnt = task->tk_client;
int status;
if (!task->tk_msg.rpc_proc->p_proc)
trace_xprt_ping(task->tk_xprt, task->tk_status);
status = task->tk_status;
if (status >= 0) {
task->tk_action = call_decode;
return;
}
trace_rpc_call_status(task);
task->tk_status = 0;
switch(status) {
case -EHOSTDOWN:
case -ENETDOWN:
case -EHOSTUNREACH:
case -ENETUNREACH:
rpc: Add -EPERM processing for xs_udp_send_request() If an iptables drop rule is added for an nfs server, the client can end up in a softlockup. Because of the way that xs_sendpages() is structured, the -EPERM is ignored since the prior bits of the packet may have been successfully queued and thus xs_sendpages() returns a non-zero value. Then, xs_udp_send_request() thinks that because some bits were queued it should return -EAGAIN. We then try the request again and again, resulting in cpu spinning. Reproducer: 1) open a file on the nfs server '/nfs/foo' (mounted using udp) 2) iptables -A OUTPUT -d <nfs server ip> -j DROP 3) write to /nfs/foo 4) close /nfs/foo 5) iptables -D OUTPUT -d <nfs server ip> -j DROP The softlockup occurs in step 4 above. The previous patch, allows xs_sendpages() to return both a sent count and any error values that may have occurred. Thus, if we get an -EPERM, return that to the higher level code. With this patch in place we can successfully abort the above sequence and avoid the softlockup. I also tried the above test case on an nfs mount on tcp and although the system does not softlockup, I still ended up with the 'hung_task' firing after 120 seconds, due to the i/o being stuck. The tcp case appears a bit harder to fix, since -EPERM appears to get ignored much lower down in the stack and does not propogate up to xs_sendpages(). This case is not quite as insidious as the softlockup and it is not addressed here. Reported-by: Yigong Lou <ylou@akamai.com> Signed-off-by: Jason Baron <jbaron@akamai.com> Signed-off-by: Trond Myklebust <trond.myklebust@primarydata.com>
2014-09-24 22:08:04 +04:00
case -EPERM:
if (RPC_IS_SOFTCONN(task))
goto out_exit;
/*
* Delay any retries for 3 seconds, then handle as if it
* were a timeout.
*/
rpc_delay(task, 3*HZ);
fallthrough;
case -ETIMEDOUT:
break;
case -ECONNREFUSED:
case -ECONNRESET:
case -ECONNABORTED:
case -ENOTCONN:
rpc_force_rebind(clnt);
break;
case -EADDRINUSE:
rpc_delay(task, 3*HZ);
fallthrough;
case -EPIPE:
case -EAGAIN:
break;
case -ENFILE:
case -ENOBUFS:
case -ENOMEM:
rpc_delay(task, HZ>>2);
break;
case -EIO:
/* shutdown or soft timeout */
goto out_exit;
default:
if (clnt->cl_chatty)
printk("%s: RPC call returned error %d\n",
clnt->cl_program->name, -status);
goto out_exit;
}
task->tk_action = call_encode;
rpc_check_timeout(task);
return;
out_exit:
rpc_call_rpcerror(task, status);
}
static bool
rpc_check_connected(const struct rpc_rqst *req)
{
/* No allocated request or transport? return true */
if (!req || !req->rq_xprt)
return true;
return xprt_connected(req->rq_xprt);
}
static void
rpc_check_timeout(struct rpc_task *task)
{
struct rpc_clnt *clnt = task->tk_client;
if (RPC_SIGNALLED(task))
return;
if (xprt_adjust_timeout(task->tk_rqstp) == 0)
return;
trace_rpc_timeout_status(task);
task->tk_timeouts++;
if (RPC_IS_SOFTCONN(task) && !rpc_check_connected(task->tk_rqstp)) {
rpc_call_rpcerror(task, -ETIMEDOUT);
return;
}
if (RPC_IS_SOFT(task)) {
/*
* Once a "no retrans timeout" soft tasks (a.k.a NFSv4) has
* been sent, it should time out only if the transport
* connection gets terminally broken.
*/
if ((task->tk_flags & RPC_TASK_NO_RETRANS_TIMEOUT) &&
rpc_check_connected(task->tk_rqstp))
return;
if (clnt->cl_chatty) {
pr_notice_ratelimited(
"%s: server %s not responding, timed out\n",
clnt->cl_program->name,
task->tk_xprt->servername);
}
if (task->tk_flags & RPC_TASK_TIMEOUT)
rpc_call_rpcerror(task, -ETIMEDOUT);
else
__rpc_call_rpcerror(task, -EIO, -ETIMEDOUT);
return;
}
if (!(task->tk_flags & RPC_CALL_MAJORSEEN)) {
task->tk_flags |= RPC_CALL_MAJORSEEN;
if (clnt->cl_chatty) {
pr_notice_ratelimited(
"%s: server %s not responding, still trying\n",
clnt->cl_program->name,
task->tk_xprt->servername);
}
}
rpc_force_rebind(clnt);
/*
* Did our request time out due to an RPCSEC_GSS out-of-sequence
* event? RFC2203 requires the server to drop all such requests.
*/
rpcauth_invalcred(task);
}
/*
* 7. Decode the RPC reply
*/
static void
call_decode(struct rpc_task *task)
{
struct rpc_clnt *clnt = task->tk_client;
struct rpc_rqst *req = task->tk_rqstp;
struct xdr_stream xdr;
int err;
if (!task->tk_msg.rpc_proc->p_decode) {
task->tk_action = rpc_exit_task;
return;
}
if (task->tk_flags & RPC_CALL_MAJORSEEN) {
if (clnt->cl_chatty) {
pr_notice_ratelimited("%s: server %s OK\n",
clnt->cl_program->name,
task->tk_xprt->servername);
}
task->tk_flags &= ~RPC_CALL_MAJORSEEN;
}
/*
* Did we ever call xprt_complete_rqst()? If not, we should assume
* the message is incomplete.
*/
err = -EAGAIN;
if (!req->rq_reply_bytes_recvd)
goto out;
/* Ensure that we see all writes made by xprt_complete_rqst()
* before it changed req->rq_reply_bytes_recvd.
*/
smp_rmb();
req->rq_rcv_buf.len = req->rq_private_buf.len;
trace_rpc_xdr_recvfrom(task, &req->rq_rcv_buf);
/* Check that the softirq receive buffer is valid */
WARN_ON(memcmp(&req->rq_rcv_buf, &req->rq_private_buf,
sizeof(req->rq_rcv_buf)) != 0);
xdr_init_decode(&xdr, &req->rq_rcv_buf,
req->rq_rcv_buf.head[0].iov_base, req);
err = rpc_decode_header(task, &xdr);
out:
switch (err) {
case 0:
task->tk_action = rpc_exit_task;
task->tk_status = rpcauth_unwrap_resp(task, &xdr);
xdr_finish_decode(&xdr);
return;
case -EAGAIN:
task->tk_status = 0;
if (task->tk_client->cl_discrtry)
xprt_conditional_disconnect(req->rq_xprt,
req->rq_connect_cookie);
task->tk_action = call_encode;
rpc_check_timeout(task);
break;
case -EKEYREJECTED:
task->tk_action = call_reserve;
rpc_check_timeout(task);
rpcauth_invalcred(task);
/* Ensure we obtain a new XID if we retry! */
xprt_release(task);
}
}
static int
rpc_encode_header(struct rpc_task *task, struct xdr_stream *xdr)
{
struct rpc_clnt *clnt = task->tk_client;
struct rpc_rqst *req = task->tk_rqstp;
__be32 *p;
int error;
error = -EMSGSIZE;
p = xdr_reserve_space(xdr, RPC_CALLHDRSIZE << 2);
if (!p)
goto out_fail;
*p++ = req->rq_xid;
*p++ = rpc_call;
*p++ = cpu_to_be32(RPC_VERSION);
*p++ = cpu_to_be32(clnt->cl_prog);
*p++ = cpu_to_be32(clnt->cl_vers);
*p = cpu_to_be32(task->tk_msg.rpc_proc->p_proc);
error = rpcauth_marshcred(task, xdr);
if (error < 0)
goto out_fail;
return 0;
out_fail:
trace_rpc_bad_callhdr(task);
rpc_call_rpcerror(task, error);
return error;
}
static noinline int
rpc_decode_header(struct rpc_task *task, struct xdr_stream *xdr)
{
struct rpc_clnt *clnt = task->tk_client;
int error;
__be32 *p;
/* RFC-1014 says that the representation of XDR data must be a
* multiple of four bytes
* - if it isn't pointer subtraction in the NFS client may give
* undefined results
*/
if (task->tk_rqstp->rq_rcv_buf.len & 3)
goto out_unparsable;
p = xdr_inline_decode(xdr, 3 * sizeof(*p));
if (!p)
goto out_unparsable;
p++; /* skip XID */
if (*p++ != rpc_reply)
goto out_unparsable;
if (*p++ != rpc_msg_accepted)
goto out_msg_denied;
nfs41: Process the RPC call direction Reading and storing the RPC direction is a three step process. 1. xs_tcp_read_calldir() reads the RPC direction, but it will not store it in the XDR buffer since the 'struct rpc_rqst' is not yet available. 2. The 'struct rpc_rqst' is obtained during the TCP_RCV_COPY_DATA state. This state need not necessarily be preceeded by the TCP_RCV_READ_CALLDIR. For example, we may be reading a continuation packet to a large reply. Therefore, we can't simply obtain the 'struct rpc_rqst' during the TCP_RCV_READ_CALLDIR state and assume it's available during TCP_RCV_COPY_DATA. This patch adds a new TCP_RCV_READ_CALLDIR flag to indicate the need to read the RPC direction. It then uses TCP_RCV_COPY_CALLDIR to indicate the RPC direction needs to be saved after the 'struct rpc_rqst' has been allocated. 3. The 'struct rpc_rqst' is obtained by the xs_tcp_read_data() helper functions. xs_tcp_read_common() then saves the RPC direction in the XDR buffer if TCP_RCV_COPY_CALLDIR is set. This will happen when we're reading the data immediately after the direction was read. xs_tcp_read_common() then clears this flag. [was nfs41: Skip past the RPC call direction] Signed-off-by: Ricardo Labiaga <Ricardo.Labiaga@netapp.com> Signed-off-by: Benny Halevy <bhalevy@panasas.com> [nfs41: sunrpc: Add RPC direction back into the XDR buffer] Signed-off-by: Ricardo Labiaga <Ricardo.Labiaga@netapp.com> Signed-off-by: Benny Halevy <bhalevy@panasas.com> [nfs41: sunrpc: Don't skip past the RPC call direction] Signed-off-by: Ricardo Labiaga <Ricardo.Labiaga@netapp.com> Signed-off-by: Benny Halevy <bhalevy@panasas.com>
2009-04-01 17:22:54 +04:00
error = rpcauth_checkverf(task, xdr);
SUNRPC: fix handling expired GSS context In the case where we have received a successful reply to an RPC request, but while processing the reply the client in rpc_decode_header() finds an expired context, the code ends up propagating the error to the caller instead of getting a new context and retrying the request. To give more details, in rpc_decode_header() we call rpcauth_checkverf() will call into the gss and internally will at some point call gss_validate() which has a check if the current’s context lifetime expired, and it would fail. The reason for the failure gets ‘scrubbed’ and translated to EACCES so when we get back to rpc_decode_header() we just go to “out_verifier” which for that error would get converted to “out_garbage” (ie it’s treated as garballed reply) and the next action is call_encode. Which (1) doesn’t reencode or re-send (not to mention no upcall happens because context expires as that reason just not known) and it again fails in the same decoding process. After re-trying it 3 times the error is propagated back to the caller (ie nfs4_write_done_cb() in the case a failing write). To fix this, instead we need to look to the case where the server decides that context has expired and replies with an RPC auth error. In that case, the rpc_decode_header() goes to "out_msg_denied" in that we return EKEYREJECTED which in call_decode() is sent to “call_reserve” which triggers an upcalls and a re-try of the operation. The proposed fix is in case of a failed rpc_decode_header() to check if credentials were set to be invalid and use that as a proxy for deciding that context has expired and then treat is same way as receiving an auth error. Signed-off-by: Olga Kornievskaia <kolga@netapp.com> Reviewed-by: Benjamin Coddington <bcodding@redhat.com> Signed-off-by: Trond Myklebust <trond.myklebust@hammerspace.com>
2024-04-18 17:11:04 +03:00
if (error) {
struct rpc_cred *cred = task->tk_rqstp->rq_cred;
if (!test_bit(RPCAUTH_CRED_UPTODATE, &cred->cr_flags)) {
rpcauth_invalcred(task);
if (!task->tk_cred_retry)
goto out_err;
task->tk_cred_retry--;
trace_rpc__stale_creds(task);
return -EKEYREJECTED;
}
goto out_verifier;
SUNRPC: fix handling expired GSS context In the case where we have received a successful reply to an RPC request, but while processing the reply the client in rpc_decode_header() finds an expired context, the code ends up propagating the error to the caller instead of getting a new context and retrying the request. To give more details, in rpc_decode_header() we call rpcauth_checkverf() will call into the gss and internally will at some point call gss_validate() which has a check if the current’s context lifetime expired, and it would fail. The reason for the failure gets ‘scrubbed’ and translated to EACCES so when we get back to rpc_decode_header() we just go to “out_verifier” which for that error would get converted to “out_garbage” (ie it’s treated as garballed reply) and the next action is call_encode. Which (1) doesn’t reencode or re-send (not to mention no upcall happens because context expires as that reason just not known) and it again fails in the same decoding process. After re-trying it 3 times the error is propagated back to the caller (ie nfs4_write_done_cb() in the case a failing write). To fix this, instead we need to look to the case where the server decides that context has expired and replies with an RPC auth error. In that case, the rpc_decode_header() goes to "out_msg_denied" in that we return EKEYREJECTED which in call_decode() is sent to “call_reserve” which triggers an upcalls and a re-try of the operation. The proposed fix is in case of a failed rpc_decode_header() to check if credentials were set to be invalid and use that as a proxy for deciding that context has expired and then treat is same way as receiving an auth error. Signed-off-by: Olga Kornievskaia <kolga@netapp.com> Reviewed-by: Benjamin Coddington <bcodding@redhat.com> Signed-off-by: Trond Myklebust <trond.myklebust@hammerspace.com>
2024-04-18 17:11:04 +03:00
}
p = xdr_inline_decode(xdr, sizeof(*p));
if (!p)
goto out_unparsable;
switch (*p) {
case rpc_success:
return 0;
case rpc_prog_unavail:
trace_rpc__prog_unavail(task);
error = -EPFNOSUPPORT;
goto out_err;
case rpc_prog_mismatch:
trace_rpc__prog_mismatch(task);
error = -EPROTONOSUPPORT;
goto out_err;
case rpc_proc_unavail:
trace_rpc__proc_unavail(task);
error = -EOPNOTSUPP;
goto out_err;
case rpc_garbage_args:
case rpc_system_err:
trace_rpc__garbage_args(task);
error = -EIO;
break;
default:
goto out_unparsable;
}
out_garbage:
clnt->cl_stats->rpcgarbage++;
if (task->tk_garb_retry) {
task->tk_garb_retry--;
task->tk_action = call_encode;
return -EAGAIN;
}
out_err:
rpc_call_rpcerror(task, error);
return error;
out_unparsable:
trace_rpc__unparsable(task);
error = -EIO;
goto out_garbage;
out_verifier:
trace_rpc_bad_verifier(task);
switch (error) {
case -EPROTONOSUPPORT:
goto out_err;
case -EACCES:
/* Re-encode with a fresh cred */
fallthrough;
default:
goto out_garbage;
}
out_msg_denied:
error = -EACCES;
p = xdr_inline_decode(xdr, sizeof(*p));
if (!p)
goto out_unparsable;
switch (*p++) {
case rpc_auth_error:
break;
case rpc_mismatch:
trace_rpc__mismatch(task);
error = -EPROTONOSUPPORT;
goto out_err;
default:
goto out_unparsable;
}
p = xdr_inline_decode(xdr, sizeof(*p));
if (!p)
goto out_unparsable;
switch (*p++) {
case rpc_autherr_rejectedcred:
case rpc_autherr_rejectedverf:
case rpcsec_gsserr_credproblem:
case rpcsec_gsserr_ctxproblem:
rpcauth_invalcred(task);
if (!task->tk_cred_retry)
break;
task->tk_cred_retry--;
trace_rpc__stale_creds(task);
return -EKEYREJECTED;
case rpc_autherr_badcred:
case rpc_autherr_badverf:
/* possibly garbled cred/verf? */
if (!task->tk_garb_retry)
break;
task->tk_garb_retry--;
trace_rpc__bad_creds(task);
task->tk_action = call_encode;
return -EAGAIN;
case rpc_autherr_tooweak:
trace_rpc__auth_tooweak(task);
pr_warn("RPC: server %s requires stronger authentication.\n",
task->tk_xprt->servername);
break;
default:
goto out_unparsable;
}
goto out_err;
}
static void rpcproc_encode_null(struct rpc_rqst *rqstp, struct xdr_stream *xdr,
const void *obj)
{
}
static int rpcproc_decode_null(struct rpc_rqst *rqstp, struct xdr_stream *xdr,
void *obj)
{
return 0;
}
static const struct rpc_procinfo rpcproc_null = {
.p_encode = rpcproc_encode_null,
.p_decode = rpcproc_decode_null,
};
static const struct rpc_procinfo rpcproc_null_noreply = {
.p_encode = rpcproc_encode_null,
};
static void
rpc_null_call_prepare(struct rpc_task *task, void *data)
{
task->tk_flags &= ~RPC_TASK_NO_RETRANS_TIMEOUT;
rpc_call_start(task);
}
static const struct rpc_call_ops rpc_null_ops = {
.rpc_call_prepare = rpc_null_call_prepare,
.rpc_call_done = rpc_default_callback,
};
static
struct rpc_task *rpc_call_null_helper(struct rpc_clnt *clnt,
struct rpc_xprt *xprt, struct rpc_cred *cred, int flags,
const struct rpc_call_ops *ops, void *data)
{
struct rpc_message msg = {
.rpc_proc = &rpcproc_null,
};
struct rpc_task_setup task_setup_data = {
.rpc_client = clnt,
.rpc_xprt = xprt,
.rpc_message = &msg,
.rpc_op_cred = cred,
.callback_ops = ops ?: &rpc_null_ops,
.callback_data = data,
.flags = flags | RPC_TASK_SOFT | RPC_TASK_SOFTCONN |
RPC_TASK_NULLCREDS,
};
return rpc_run_task(&task_setup_data);
}
struct rpc_task *rpc_call_null(struct rpc_clnt *clnt, struct rpc_cred *cred, int flags)
{
return rpc_call_null_helper(clnt, NULL, cred, flags, NULL, NULL);
}
EXPORT_SYMBOL_GPL(rpc_call_null);
static int rpc_ping(struct rpc_clnt *clnt)
{
struct rpc_task *task;
int status;
if (clnt->cl_auth->au_ops->ping)
return clnt->cl_auth->au_ops->ping(clnt);
task = rpc_call_null_helper(clnt, NULL, NULL, 0, NULL, NULL);
if (IS_ERR(task))
return PTR_ERR(task);
status = task->tk_status;
rpc_put_task(task);
return status;
}
static int rpc_ping_noreply(struct rpc_clnt *clnt)
{
struct rpc_message msg = {
.rpc_proc = &rpcproc_null_noreply,
};
struct rpc_task_setup task_setup_data = {
.rpc_client = clnt,
.rpc_message = &msg,
.callback_ops = &rpc_null_ops,
.flags = RPC_TASK_SOFT | RPC_TASK_SOFTCONN | RPC_TASK_NULLCREDS,
};
struct rpc_task *task;
int status;
task = rpc_run_task(&task_setup_data);
if (IS_ERR(task))
return PTR_ERR(task);
status = task->tk_status;
rpc_put_task(task);
return status;
}
struct rpc_cb_add_xprt_calldata {
struct rpc_xprt_switch *xps;
struct rpc_xprt *xprt;
};
static void rpc_cb_add_xprt_done(struct rpc_task *task, void *calldata)
{
struct rpc_cb_add_xprt_calldata *data = calldata;
if (task->tk_status == 0)
rpc_xprt_switch_add_xprt(data->xps, data->xprt);
}
static void rpc_cb_add_xprt_release(void *calldata)
{
struct rpc_cb_add_xprt_calldata *data = calldata;
xprt_put(data->xprt);
xprt_switch_put(data->xps);
kfree(data);
}
static const struct rpc_call_ops rpc_cb_add_xprt_call_ops = {
.rpc_call_prepare = rpc_null_call_prepare,
.rpc_call_done = rpc_cb_add_xprt_done,
.rpc_release = rpc_cb_add_xprt_release,
};
/**
* rpc_clnt_test_and_add_xprt - Test and add a new transport to a rpc_clnt
* @clnt: pointer to struct rpc_clnt
* @xps: pointer to struct rpc_xprt_switch,
* @xprt: pointer struct rpc_xprt
NFSv4.1: fix pnfs MDS=DS session trunking Currently, when GETDEVICEINFO returns multiple locations where each is a different IP but the server's identity is same as MDS, then nfs4_set_ds_client() finds the existing nfs_client structure which has the MDS's max_connect value (and if it's 1), then the 1st IP on the DS's list will get dropped due to MDS trunking rules. Other IPs would be added as they fall under the pnfs trunking rules. For the list of IPs the 1st goes thru calling nfs4_set_ds_client() which will eventually call nfs4_add_trunk() and call into rpc_clnt_test_and_add_xprt() which has the check for MDS trunking. The other IPs (after the 1st one), would call rpc_clnt_add_xprt() which doesn't go thru that check. nfs4_add_trunk() is called when MDS trunking is happening and it needs to enforce the usage of max_connect mount option of the 1st mount. However, this shouldn't be applied to pnfs flow. Instead, this patch proposed to treat MDS=DS as DS trunking and make sure that MDS's max_connect limit does not apply to the 1st IP returned in the GETDEVICEINFO list. It does so by marking the newly created client with a new flag NFS_CS_PNFS which then used to pass max_connect value to use into the rpc_clnt_test_and_add_xprt() instead of the existing rpc client's max_connect value set by the MDS connection. For example, mount was done without max_connect value set so MDS's rpc client has cl_max_connect=1. Upon calling into rpc_clnt_test_and_add_xprt() and using rpc client's value, the caller passes in max_connect value which is previously been set in the pnfs path (as a part of handling GETDEVICEINFO list of IPs) in nfs4_set_ds_client(). However, when NFS_CS_PNFS flag is not set and we know we are doing MDS trunking, comparing a new IP of the same server, we then set the max_connect value to the existing MDS's value and pass that into rpc_clnt_test_and_add_xprt(). Fixes: dc48e0abee24 ("SUNRPC enforce creation of no more than max_connect xprts") Signed-off-by: Olga Kornievskaia <kolga@netapp.com> Signed-off-by: Anna Schumaker <Anna.Schumaker@Netapp.com>
2023-08-30 22:29:34 +03:00
* @in_max_connect: pointer to the max_connect value for the passed in xprt transport
*/
int rpc_clnt_test_and_add_xprt(struct rpc_clnt *clnt,
struct rpc_xprt_switch *xps, struct rpc_xprt *xprt,
NFSv4.1: fix pnfs MDS=DS session trunking Currently, when GETDEVICEINFO returns multiple locations where each is a different IP but the server's identity is same as MDS, then nfs4_set_ds_client() finds the existing nfs_client structure which has the MDS's max_connect value (and if it's 1), then the 1st IP on the DS's list will get dropped due to MDS trunking rules. Other IPs would be added as they fall under the pnfs trunking rules. For the list of IPs the 1st goes thru calling nfs4_set_ds_client() which will eventually call nfs4_add_trunk() and call into rpc_clnt_test_and_add_xprt() which has the check for MDS trunking. The other IPs (after the 1st one), would call rpc_clnt_add_xprt() which doesn't go thru that check. nfs4_add_trunk() is called when MDS trunking is happening and it needs to enforce the usage of max_connect mount option of the 1st mount. However, this shouldn't be applied to pnfs flow. Instead, this patch proposed to treat MDS=DS as DS trunking and make sure that MDS's max_connect limit does not apply to the 1st IP returned in the GETDEVICEINFO list. It does so by marking the newly created client with a new flag NFS_CS_PNFS which then used to pass max_connect value to use into the rpc_clnt_test_and_add_xprt() instead of the existing rpc client's max_connect value set by the MDS connection. For example, mount was done without max_connect value set so MDS's rpc client has cl_max_connect=1. Upon calling into rpc_clnt_test_and_add_xprt() and using rpc client's value, the caller passes in max_connect value which is previously been set in the pnfs path (as a part of handling GETDEVICEINFO list of IPs) in nfs4_set_ds_client(). However, when NFS_CS_PNFS flag is not set and we know we are doing MDS trunking, comparing a new IP of the same server, we then set the max_connect value to the existing MDS's value and pass that into rpc_clnt_test_and_add_xprt(). Fixes: dc48e0abee24 ("SUNRPC enforce creation of no more than max_connect xprts") Signed-off-by: Olga Kornievskaia <kolga@netapp.com> Signed-off-by: Anna Schumaker <Anna.Schumaker@Netapp.com>
2023-08-30 22:29:34 +03:00
void *in_max_connect)
{
struct rpc_cb_add_xprt_calldata *data;
struct rpc_task *task;
NFSv4.1: fix pnfs MDS=DS session trunking Currently, when GETDEVICEINFO returns multiple locations where each is a different IP but the server's identity is same as MDS, then nfs4_set_ds_client() finds the existing nfs_client structure which has the MDS's max_connect value (and if it's 1), then the 1st IP on the DS's list will get dropped due to MDS trunking rules. Other IPs would be added as they fall under the pnfs trunking rules. For the list of IPs the 1st goes thru calling nfs4_set_ds_client() which will eventually call nfs4_add_trunk() and call into rpc_clnt_test_and_add_xprt() which has the check for MDS trunking. The other IPs (after the 1st one), would call rpc_clnt_add_xprt() which doesn't go thru that check. nfs4_add_trunk() is called when MDS trunking is happening and it needs to enforce the usage of max_connect mount option of the 1st mount. However, this shouldn't be applied to pnfs flow. Instead, this patch proposed to treat MDS=DS as DS trunking and make sure that MDS's max_connect limit does not apply to the 1st IP returned in the GETDEVICEINFO list. It does so by marking the newly created client with a new flag NFS_CS_PNFS which then used to pass max_connect value to use into the rpc_clnt_test_and_add_xprt() instead of the existing rpc client's max_connect value set by the MDS connection. For example, mount was done without max_connect value set so MDS's rpc client has cl_max_connect=1. Upon calling into rpc_clnt_test_and_add_xprt() and using rpc client's value, the caller passes in max_connect value which is previously been set in the pnfs path (as a part of handling GETDEVICEINFO list of IPs) in nfs4_set_ds_client(). However, when NFS_CS_PNFS flag is not set and we know we are doing MDS trunking, comparing a new IP of the same server, we then set the max_connect value to the existing MDS's value and pass that into rpc_clnt_test_and_add_xprt(). Fixes: dc48e0abee24 ("SUNRPC enforce creation of no more than max_connect xprts") Signed-off-by: Olga Kornievskaia <kolga@netapp.com> Signed-off-by: Anna Schumaker <Anna.Schumaker@Netapp.com>
2023-08-30 22:29:34 +03:00
int max_connect = clnt->cl_max_connect;
NFSv4.1: fix pnfs MDS=DS session trunking Currently, when GETDEVICEINFO returns multiple locations where each is a different IP but the server's identity is same as MDS, then nfs4_set_ds_client() finds the existing nfs_client structure which has the MDS's max_connect value (and if it's 1), then the 1st IP on the DS's list will get dropped due to MDS trunking rules. Other IPs would be added as they fall under the pnfs trunking rules. For the list of IPs the 1st goes thru calling nfs4_set_ds_client() which will eventually call nfs4_add_trunk() and call into rpc_clnt_test_and_add_xprt() which has the check for MDS trunking. The other IPs (after the 1st one), would call rpc_clnt_add_xprt() which doesn't go thru that check. nfs4_add_trunk() is called when MDS trunking is happening and it needs to enforce the usage of max_connect mount option of the 1st mount. However, this shouldn't be applied to pnfs flow. Instead, this patch proposed to treat MDS=DS as DS trunking and make sure that MDS's max_connect limit does not apply to the 1st IP returned in the GETDEVICEINFO list. It does so by marking the newly created client with a new flag NFS_CS_PNFS which then used to pass max_connect value to use into the rpc_clnt_test_and_add_xprt() instead of the existing rpc client's max_connect value set by the MDS connection. For example, mount was done without max_connect value set so MDS's rpc client has cl_max_connect=1. Upon calling into rpc_clnt_test_and_add_xprt() and using rpc client's value, the caller passes in max_connect value which is previously been set in the pnfs path (as a part of handling GETDEVICEINFO list of IPs) in nfs4_set_ds_client(). However, when NFS_CS_PNFS flag is not set and we know we are doing MDS trunking, comparing a new IP of the same server, we then set the max_connect value to the existing MDS's value and pass that into rpc_clnt_test_and_add_xprt(). Fixes: dc48e0abee24 ("SUNRPC enforce creation of no more than max_connect xprts") Signed-off-by: Olga Kornievskaia <kolga@netapp.com> Signed-off-by: Anna Schumaker <Anna.Schumaker@Netapp.com>
2023-08-30 22:29:34 +03:00
if (in_max_connect)
max_connect = *(int *)in_max_connect;
if (xps->xps_nunique_destaddr_xprts + 1 > max_connect) {
rcu_read_lock();
pr_warn("SUNRPC: reached max allowed number (%d) did not add "
NFSv4.1: fix pnfs MDS=DS session trunking Currently, when GETDEVICEINFO returns multiple locations where each is a different IP but the server's identity is same as MDS, then nfs4_set_ds_client() finds the existing nfs_client structure which has the MDS's max_connect value (and if it's 1), then the 1st IP on the DS's list will get dropped due to MDS trunking rules. Other IPs would be added as they fall under the pnfs trunking rules. For the list of IPs the 1st goes thru calling nfs4_set_ds_client() which will eventually call nfs4_add_trunk() and call into rpc_clnt_test_and_add_xprt() which has the check for MDS trunking. The other IPs (after the 1st one), would call rpc_clnt_add_xprt() which doesn't go thru that check. nfs4_add_trunk() is called when MDS trunking is happening and it needs to enforce the usage of max_connect mount option of the 1st mount. However, this shouldn't be applied to pnfs flow. Instead, this patch proposed to treat MDS=DS as DS trunking and make sure that MDS's max_connect limit does not apply to the 1st IP returned in the GETDEVICEINFO list. It does so by marking the newly created client with a new flag NFS_CS_PNFS which then used to pass max_connect value to use into the rpc_clnt_test_and_add_xprt() instead of the existing rpc client's max_connect value set by the MDS connection. For example, mount was done without max_connect value set so MDS's rpc client has cl_max_connect=1. Upon calling into rpc_clnt_test_and_add_xprt() and using rpc client's value, the caller passes in max_connect value which is previously been set in the pnfs path (as a part of handling GETDEVICEINFO list of IPs) in nfs4_set_ds_client(). However, when NFS_CS_PNFS flag is not set and we know we are doing MDS trunking, comparing a new IP of the same server, we then set the max_connect value to the existing MDS's value and pass that into rpc_clnt_test_and_add_xprt(). Fixes: dc48e0abee24 ("SUNRPC enforce creation of no more than max_connect xprts") Signed-off-by: Olga Kornievskaia <kolga@netapp.com> Signed-off-by: Anna Schumaker <Anna.Schumaker@Netapp.com>
2023-08-30 22:29:34 +03:00
"transport to server: %s\n", max_connect,
rpc_peeraddr2str(clnt, RPC_DISPLAY_ADDR));
rcu_read_unlock();
return -EINVAL;
}
data = kmalloc(sizeof(*data), GFP_KERNEL);
if (!data)
return -ENOMEM;
data->xps = xprt_switch_get(xps);
data->xprt = xprt_get(xprt);
if (rpc_xprt_switch_has_addr(data->xps, (struct sockaddr *)&xprt->addr)) {
rpc_cb_add_xprt_release(data);
goto success;
}
task = rpc_call_null_helper(clnt, xprt, NULL, RPC_TASK_ASYNC,
&rpc_cb_add_xprt_call_ops, data);
if (IS_ERR(task))
return PTR_ERR(task);
data->xps->xps_nunique_destaddr_xprts++;
rpc_put_task(task);
success:
return 1;
}
EXPORT_SYMBOL_GPL(rpc_clnt_test_and_add_xprt);
static int rpc_clnt_add_xprt_helper(struct rpc_clnt *clnt,
struct rpc_xprt *xprt,
struct rpc_add_xprt_test *data)
{
struct rpc_task *task;
int status = -EADDRINUSE;
/* Test the connection */
task = rpc_call_null_helper(clnt, xprt, NULL, 0, NULL, NULL);
if (IS_ERR(task))
return PTR_ERR(task);
status = task->tk_status;
rpc_put_task(task);
if (status < 0)
return status;
/* rpc_xprt_switch and rpc_xprt are deferrenced by add_xprt_test() */
data->add_xprt_test(clnt, xprt, data->data);
return 0;
}
/**
* rpc_clnt_setup_test_and_add_xprt()
*
* This is an rpc_clnt_add_xprt setup() function which returns 1 so:
* 1) caller of the test function must dereference the rpc_xprt_switch
* and the rpc_xprt.
* 2) test function must call rpc_xprt_switch_add_xprt, usually in
* the rpc_call_done routine.
*
* Upon success (return of 1), the test function adds the new
* transport to the rpc_clnt xprt switch
*
* @clnt: struct rpc_clnt to get the new transport
* @xps: the rpc_xprt_switch to hold the new transport
* @xprt: the rpc_xprt to test
* @data: a struct rpc_add_xprt_test pointer that holds the test function
* and test function call data
*/
int rpc_clnt_setup_test_and_add_xprt(struct rpc_clnt *clnt,
struct rpc_xprt_switch *xps,
struct rpc_xprt *xprt,
void *data)
{
int status = -EADDRINUSE;
xprt = xprt_get(xprt);
xprt_switch_get(xps);
if (rpc_xprt_switch_has_addr(xps, (struct sockaddr *)&xprt->addr))
goto out_err;
status = rpc_clnt_add_xprt_helper(clnt, xprt, data);
if (status < 0)
goto out_err;
status = 1;
out_err:
xprt_put(xprt);
xprt_switch_put(xps);
if (status < 0)
pr_info("RPC: rpc_clnt_test_xprt failed: %d addr %s not "
"added\n", status,
xprt->address_strings[RPC_DISPLAY_ADDR]);
/* so that rpc_clnt_add_xprt does not call rpc_xprt_switch_add_xprt */
return status;
}
EXPORT_SYMBOL_GPL(rpc_clnt_setup_test_and_add_xprt);
/**
* rpc_clnt_add_xprt - Add a new transport to a rpc_clnt
* @clnt: pointer to struct rpc_clnt
* @xprtargs: pointer to struct xprt_create
* @setup: callback to test and/or set up the connection
* @data: pointer to setup function data
*
* Creates a new transport using the parameters set in args and
* adds it to clnt.
* If ping is set, then test that connectivity succeeds before
* adding the new transport.
*
*/
int rpc_clnt_add_xprt(struct rpc_clnt *clnt,
struct xprt_create *xprtargs,
int (*setup)(struct rpc_clnt *,
struct rpc_xprt_switch *,
struct rpc_xprt *,
void *),
void *data)
{
struct rpc_xprt_switch *xps;
struct rpc_xprt *xprt;
unsigned long connect_timeout;
unsigned long reconnect_timeout;
unsigned char resvport, reuseport;
int ret = 0, ident;
rcu_read_lock();
xps = xprt_switch_get(rcu_dereference(clnt->cl_xpi.xpi_xpswitch));
xprt = xprt_iter_xprt(&clnt->cl_xpi);
if (xps == NULL || xprt == NULL) {
rcu_read_unlock();
xprt_switch_put(xps);
return -EAGAIN;
}
resvport = xprt->resvport;
reuseport = xprt->reuseport;
connect_timeout = xprt->connect_timeout;
reconnect_timeout = xprt->max_reconnect_timeout;
ident = xprt->xprt_class->ident;
rcu_read_unlock();
if (!xprtargs->ident)
xprtargs->ident = ident;
xprtargs->xprtsec = clnt->cl_xprtsec;
xprt = xprt_create_transport(xprtargs);
if (IS_ERR(xprt)) {
ret = PTR_ERR(xprt);
goto out_put_switch;
}
xprt->resvport = resvport;
xprt->reuseport = reuseport;
if (xprtargs->connect_timeout)
connect_timeout = xprtargs->connect_timeout;
if (xprtargs->reconnect_timeout)
reconnect_timeout = xprtargs->reconnect_timeout;
if (xprt->ops->set_connect_timeout != NULL)
xprt->ops->set_connect_timeout(xprt,
connect_timeout,
reconnect_timeout);
rpc_xprt_switch_set_roundrobin(xps);
if (setup) {
ret = setup(clnt, xps, xprt, data);
if (ret != 0)
goto out_put_xprt;
}
rpc_xprt_switch_add_xprt(xps, xprt);
out_put_xprt:
xprt_put(xprt);
out_put_switch:
xprt_switch_put(xps);
return ret;
}
EXPORT_SYMBOL_GPL(rpc_clnt_add_xprt);
static int rpc_xprt_probe_trunked(struct rpc_clnt *clnt,
struct rpc_xprt *xprt,
struct rpc_add_xprt_test *data)
{
struct rpc_xprt *main_xprt;
int status = 0;
xprt_get(xprt);
rcu_read_lock();
main_xprt = xprt_get(rcu_dereference(clnt->cl_xprt));
status = rpc_cmp_addr_port((struct sockaddr *)&xprt->addr,
(struct sockaddr *)&main_xprt->addr);
rcu_read_unlock();
xprt_put(main_xprt);
if (status || !test_bit(XPRT_OFFLINE, &xprt->state))
goto out;
status = rpc_clnt_add_xprt_helper(clnt, xprt, data);
out:
xprt_put(xprt);
return status;
}
/* rpc_clnt_probe_trunked_xprt -- probe offlined transport for session trunking
* @clnt rpc_clnt structure
*
* For each offlined transport found in the rpc_clnt structure call
* the function rpc_xprt_probe_trunked() which will determine if this
* transport still belongs to the trunking group.
*/
void rpc_clnt_probe_trunked_xprts(struct rpc_clnt *clnt,
struct rpc_add_xprt_test *data)
{
struct rpc_xprt_iter xpi;
int ret;
ret = rpc_clnt_xprt_iter_offline_init(clnt, &xpi);
if (ret)
return;
for (;;) {
struct rpc_xprt *xprt = xprt_iter_get_next(&xpi);
if (!xprt)
break;
ret = rpc_xprt_probe_trunked(clnt, xprt, data);
xprt_put(xprt);
if (ret < 0)
break;
xprt_iter_rewind(&xpi);
}
xprt_iter_destroy(&xpi);
}
EXPORT_SYMBOL_GPL(rpc_clnt_probe_trunked_xprts);
static int rpc_xprt_offline(struct rpc_clnt *clnt,
struct rpc_xprt *xprt,
void *data)
{
struct rpc_xprt *main_xprt;
struct rpc_xprt_switch *xps;
int err = 0;
xprt_get(xprt);
rcu_read_lock();
main_xprt = xprt_get(rcu_dereference(clnt->cl_xprt));
xps = xprt_switch_get(rcu_dereference(clnt->cl_xpi.xpi_xpswitch));
err = rpc_cmp_addr_port((struct sockaddr *)&xprt->addr,
(struct sockaddr *)&main_xprt->addr);
rcu_read_unlock();
xprt_put(main_xprt);
if (err)
goto out;
if (wait_on_bit_lock(&xprt->state, XPRT_LOCKED, TASK_KILLABLE)) {
err = -EINTR;
goto out;
}
xprt_set_offline_locked(xprt, xps);
xprt_release_write(xprt, NULL);
out:
xprt_put(xprt);
xprt_switch_put(xps);
return err;
}
/* rpc_clnt_manage_trunked_xprts -- offline trunked transports
* @clnt rpc_clnt structure
*
* For each active transport found in the rpc_clnt structure call
* the function rpc_xprt_offline() which will identify trunked transports
* and will mark them offline.
*/
void rpc_clnt_manage_trunked_xprts(struct rpc_clnt *clnt)
{
rpc_clnt_iterate_for_each_xprt(clnt, rpc_xprt_offline, NULL);
}
EXPORT_SYMBOL_GPL(rpc_clnt_manage_trunked_xprts);
struct connect_timeout_data {
unsigned long connect_timeout;
unsigned long reconnect_timeout;
};
static int
rpc_xprt_set_connect_timeout(struct rpc_clnt *clnt,
struct rpc_xprt *xprt,
void *data)
{
struct connect_timeout_data *timeo = data;
if (xprt->ops->set_connect_timeout)
xprt->ops->set_connect_timeout(xprt,
timeo->connect_timeout,
timeo->reconnect_timeout);
return 0;
}
void
rpc_set_connect_timeout(struct rpc_clnt *clnt,
unsigned long connect_timeout,
unsigned long reconnect_timeout)
{
struct connect_timeout_data timeout = {
.connect_timeout = connect_timeout,
.reconnect_timeout = reconnect_timeout,
};
rpc_clnt_iterate_for_each_xprt(clnt,
rpc_xprt_set_connect_timeout,
&timeout);
}
EXPORT_SYMBOL_GPL(rpc_set_connect_timeout);
void rpc_clnt_xprt_set_online(struct rpc_clnt *clnt, struct rpc_xprt *xprt)
{
struct rpc_xprt_switch *xps;
xps = rpc_clnt_xprt_switch_get(clnt);
xprt_set_online_locked(xprt, xps);
xprt_switch_put(xps);
}
void rpc_clnt_xprt_switch_add_xprt(struct rpc_clnt *clnt, struct rpc_xprt *xprt)
{
struct rpc_xprt_switch *xps;
if (rpc_clnt_xprt_switch_has_addr(clnt,
(const struct sockaddr *)&xprt->addr)) {
return rpc_clnt_xprt_set_online(clnt, xprt);
}
xps = rpc_clnt_xprt_switch_get(clnt);
rpc_xprt_switch_add_xprt(xps, xprt);
xprt_switch_put(xps);
}
EXPORT_SYMBOL_GPL(rpc_clnt_xprt_switch_add_xprt);
void rpc_clnt_xprt_switch_remove_xprt(struct rpc_clnt *clnt, struct rpc_xprt *xprt)
{
struct rpc_xprt_switch *xps;
rcu_read_lock();
xps = rcu_dereference(clnt->cl_xpi.xpi_xpswitch);
rpc_xprt_switch_remove_xprt(rcu_dereference(clnt->cl_xpi.xpi_xpswitch),
xprt, 0);
xps->xps_nunique_destaddr_xprts--;
rcu_read_unlock();
}
EXPORT_SYMBOL_GPL(rpc_clnt_xprt_switch_remove_xprt);
bool rpc_clnt_xprt_switch_has_addr(struct rpc_clnt *clnt,
const struct sockaddr *sap)
{
struct rpc_xprt_switch *xps;
bool ret;
rcu_read_lock();
xps = rcu_dereference(clnt->cl_xpi.xpi_xpswitch);
ret = rpc_xprt_switch_has_addr(xps, sap);
rcu_read_unlock();
return ret;
}
EXPORT_SYMBOL_GPL(rpc_clnt_xprt_switch_has_addr);
#if IS_ENABLED(CONFIG_SUNRPC_DEBUG)
static void rpc_show_header(void)
{
SUNRPC: Display some debugging information as text rather than numbers In rpc_show_tasks(), display the program name, version number, procedure name and tk_action as human-readable variable-length text fields rather than columnar numbers. Doing the symbol lookup here helps in cases where we have actual debugging output from a kernel log, but don't have access to the kernel image or RPC module that generated the output. Sample output: -pid- flgs status -client- --rqstp- -timeout ---ops-- 5608 0001 -11 eeb42690 f6d93710 0 f8fa1764 nfsv3 WRITE a:call_transmit_status q:none 5609 0001 -11 eeb42690 f6d937e0 0 f8fa1764 nfsv3 WRITE a:call_status q:xprt_sending 5610 0001 -11 eeb42690 f6d93230 0 f8fa1764 nfsv3 WRITE a:call_status q:xprt_sending 5611 0001 -11 eeb42690 f6d93300 0 f8fa1764 nfsv3 WRITE a:call_status q:xprt_sending 5612 0001 -11 eeb42690 f6d93090 0 f8fa1764 nfsv3 WRITE a:call_status q:xprt_sending 5613 0001 -11 eeb42690 f6d933d0 0 f8fa1764 nfsv3 WRITE a:call_status q:xprt_sending 5614 0001 -11 eeb42690 f6d93cc0 0 f8fa1764 nfsv3 WRITE a:call_status q:xprt_sending 5615 0001 -11 eeb42690 f6d93a50 0 f8fa1764 nfsv3 WRITE a:call_status q:xprt_sending 5616 0001 -11 eeb42690 f6d93640 0 f8fa1764 nfsv3 WRITE a:call_status q:xprt_sending 5617 0001 -11 eeb42690 f6d93b20 0 f8fa1764 nfsv3 WRITE a:call_status q:xprt_sending 5618 0001 -11 eeb42690 f6d93160 0 f8fa1764 nfsv3 WRITE a:call_status q:xprt_sending Signed-off-by: Chuck Lever <chuck.lever@oracle.com> Signed-off-by: Trond Myklebust <Trond.Myklebust@netapp.com>
2008-05-22 01:09:41 +04:00
printk(KERN_INFO "-pid- flgs status -client- --rqstp- "
"-timeout ---ops--\n");
}
static void rpc_show_task(const struct rpc_clnt *clnt,
const struct rpc_task *task)
{
const char *rpc_waitq = "none";
if (RPC_IS_QUEUED(task))
rpc_waitq = rpc_qname(task->tk_waitqueue);
printk(KERN_INFO "%5u %04x %6d %8p %8p %8ld %8p %sv%u %s a:%ps q:%s\n",
SUNRPC: Display some debugging information as text rather than numbers In rpc_show_tasks(), display the program name, version number, procedure name and tk_action as human-readable variable-length text fields rather than columnar numbers. Doing the symbol lookup here helps in cases where we have actual debugging output from a kernel log, but don't have access to the kernel image or RPC module that generated the output. Sample output: -pid- flgs status -client- --rqstp- -timeout ---ops-- 5608 0001 -11 eeb42690 f6d93710 0 f8fa1764 nfsv3 WRITE a:call_transmit_status q:none 5609 0001 -11 eeb42690 f6d937e0 0 f8fa1764 nfsv3 WRITE a:call_status q:xprt_sending 5610 0001 -11 eeb42690 f6d93230 0 f8fa1764 nfsv3 WRITE a:call_status q:xprt_sending 5611 0001 -11 eeb42690 f6d93300 0 f8fa1764 nfsv3 WRITE a:call_status q:xprt_sending 5612 0001 -11 eeb42690 f6d93090 0 f8fa1764 nfsv3 WRITE a:call_status q:xprt_sending 5613 0001 -11 eeb42690 f6d933d0 0 f8fa1764 nfsv3 WRITE a:call_status q:xprt_sending 5614 0001 -11 eeb42690 f6d93cc0 0 f8fa1764 nfsv3 WRITE a:call_status q:xprt_sending 5615 0001 -11 eeb42690 f6d93a50 0 f8fa1764 nfsv3 WRITE a:call_status q:xprt_sending 5616 0001 -11 eeb42690 f6d93640 0 f8fa1764 nfsv3 WRITE a:call_status q:xprt_sending 5617 0001 -11 eeb42690 f6d93b20 0 f8fa1764 nfsv3 WRITE a:call_status q:xprt_sending 5618 0001 -11 eeb42690 f6d93160 0 f8fa1764 nfsv3 WRITE a:call_status q:xprt_sending Signed-off-by: Chuck Lever <chuck.lever@oracle.com> Signed-off-by: Trond Myklebust <Trond.Myklebust@netapp.com>
2008-05-22 01:09:41 +04:00
task->tk_pid, task->tk_flags, task->tk_status,
clnt, task->tk_rqstp, rpc_task_timeout(task), task->tk_ops,
clnt->cl_program->name, clnt->cl_vers, rpc_proc_name(task),
task->tk_action, rpc_waitq);
}
void rpc_show_tasks(struct net *net)
{
struct rpc_clnt *clnt;
struct rpc_task *task;
int header = 0;
struct sunrpc_net *sn = net_generic(net, sunrpc_net_id);
spin_lock(&sn->rpc_client_lock);
list_for_each_entry(clnt, &sn->all_clients, cl_clients) {
spin_lock(&clnt->cl_lock);
list_for_each_entry(task, &clnt->cl_tasks, tk_task) {
if (!header) {
rpc_show_header();
header++;
}
rpc_show_task(clnt, task);
}
spin_unlock(&clnt->cl_lock);
}
spin_unlock(&sn->rpc_client_lock);
}
#endif
#if IS_ENABLED(CONFIG_SUNRPC_SWAP)
static int
rpc_clnt_swap_activate_callback(struct rpc_clnt *clnt,
struct rpc_xprt *xprt,
void *dummy)
{
return xprt_enable_swap(xprt);
}
int
rpc_clnt_swap_activate(struct rpc_clnt *clnt)
{
while (clnt != clnt->cl_parent)
clnt = clnt->cl_parent;
if (atomic_inc_return(&clnt->cl_swapper) == 1)
return rpc_clnt_iterate_for_each_xprt(clnt,
rpc_clnt_swap_activate_callback, NULL);
return 0;
}
EXPORT_SYMBOL_GPL(rpc_clnt_swap_activate);
static int
rpc_clnt_swap_deactivate_callback(struct rpc_clnt *clnt,
struct rpc_xprt *xprt,
void *dummy)
{
xprt_disable_swap(xprt);
return 0;
}
void
rpc_clnt_swap_deactivate(struct rpc_clnt *clnt)
{
while (clnt != clnt->cl_parent)
clnt = clnt->cl_parent;
if (atomic_dec_if_positive(&clnt->cl_swapper) == 0)
rpc_clnt_iterate_for_each_xprt(clnt,
rpc_clnt_swap_deactivate_callback, NULL);
}
EXPORT_SYMBOL_GPL(rpc_clnt_swap_deactivate);
#endif /* CONFIG_SUNRPC_SWAP */