SUNRPC: Count ops completing with tk_status < 0
We often see various error conditions with NFS4.x that show up with a very high operation count all completing with tk_status < 0 in a short period of time. Add a count to rpc_iostats to record on a per-op basis the ops that complete in this manner, which will enable lower overhead diagnostics. Signed-off-by: Dave Wysochanski <dwysocha@redhat.com> Signed-off-by: Trond Myklebust <trond.myklebust@hammerspace.com>
This commit is contained in:
parent
93ba048e1b
commit
a332518fda
@ -30,7 +30,7 @@
|
|||||||
#include <linux/ktime.h>
|
#include <linux/ktime.h>
|
||||||
#include <linux/spinlock.h>
|
#include <linux/spinlock.h>
|
||||||
|
|
||||||
#define RPC_IOSTATS_VERS "1.0"
|
#define RPC_IOSTATS_VERS "1.1"
|
||||||
|
|
||||||
struct rpc_iostats {
|
struct rpc_iostats {
|
||||||
spinlock_t om_lock;
|
spinlock_t om_lock;
|
||||||
@ -66,6 +66,11 @@ struct rpc_iostats {
|
|||||||
ktime_t om_queue, /* queued for xmit */
|
ktime_t om_queue, /* queued for xmit */
|
||||||
om_rtt, /* RPC RTT */
|
om_rtt, /* RPC RTT */
|
||||||
om_execute; /* RPC execution */
|
om_execute; /* RPC execution */
|
||||||
|
/*
|
||||||
|
* The count of operations that complete with tk_status < 0.
|
||||||
|
* These statuses usually indicate error conditions.
|
||||||
|
*/
|
||||||
|
unsigned long om_error_status;
|
||||||
} ____cacheline_aligned;
|
} ____cacheline_aligned;
|
||||||
|
|
||||||
struct rpc_task;
|
struct rpc_task;
|
||||||
|
@ -177,6 +177,8 @@ void rpc_count_iostats_metrics(const struct rpc_task *task,
|
|||||||
|
|
||||||
execute = ktime_sub(now, task->tk_start);
|
execute = ktime_sub(now, task->tk_start);
|
||||||
op_metrics->om_execute = ktime_add(op_metrics->om_execute, execute);
|
op_metrics->om_execute = ktime_add(op_metrics->om_execute, execute);
|
||||||
|
if (task->tk_status < 0)
|
||||||
|
op_metrics->om_error_status++;
|
||||||
|
|
||||||
spin_unlock(&op_metrics->om_lock);
|
spin_unlock(&op_metrics->om_lock);
|
||||||
|
|
||||||
@ -219,13 +221,14 @@ static void _add_rpc_iostats(struct rpc_iostats *a, struct rpc_iostats *b)
|
|||||||
a->om_queue = ktime_add(a->om_queue, b->om_queue);
|
a->om_queue = ktime_add(a->om_queue, b->om_queue);
|
||||||
a->om_rtt = ktime_add(a->om_rtt, b->om_rtt);
|
a->om_rtt = ktime_add(a->om_rtt, b->om_rtt);
|
||||||
a->om_execute = ktime_add(a->om_execute, b->om_execute);
|
a->om_execute = ktime_add(a->om_execute, b->om_execute);
|
||||||
|
a->om_error_status += b->om_error_status;
|
||||||
}
|
}
|
||||||
|
|
||||||
static void _print_rpc_iostats(struct seq_file *seq, struct rpc_iostats *stats,
|
static void _print_rpc_iostats(struct seq_file *seq, struct rpc_iostats *stats,
|
||||||
int op, const struct rpc_procinfo *procs)
|
int op, const struct rpc_procinfo *procs)
|
||||||
{
|
{
|
||||||
_print_name(seq, op, procs);
|
_print_name(seq, op, procs);
|
||||||
seq_printf(seq, "%lu %lu %lu %llu %llu %llu %llu %llu\n",
|
seq_printf(seq, "%lu %lu %lu %llu %llu %llu %llu %llu %lu\n",
|
||||||
stats->om_ops,
|
stats->om_ops,
|
||||||
stats->om_ntrans,
|
stats->om_ntrans,
|
||||||
stats->om_timeouts,
|
stats->om_timeouts,
|
||||||
@ -233,7 +236,8 @@ static void _print_rpc_iostats(struct seq_file *seq, struct rpc_iostats *stats,
|
|||||||
stats->om_bytes_recv,
|
stats->om_bytes_recv,
|
||||||
ktime_to_ms(stats->om_queue),
|
ktime_to_ms(stats->om_queue),
|
||||||
ktime_to_ms(stats->om_rtt),
|
ktime_to_ms(stats->om_rtt),
|
||||||
ktime_to_ms(stats->om_execute));
|
ktime_to_ms(stats->om_execute),
|
||||||
|
stats->om_error_status);
|
||||||
}
|
}
|
||||||
|
|
||||||
void rpc_clnt_show_stats(struct seq_file *seq, struct rpc_clnt *clnt)
|
void rpc_clnt_show_stats(struct seq_file *seq, struct rpc_clnt *clnt)
|
||||||
|
Loading…
x
Reference in New Issue
Block a user