rcu-tasks: Print pre-stall-warning informational messages

RCU-tasks stall-warning messages are printed after the grace period is ten
minutes old.  Unfortunately, most of us will have rebooted the system in
response to an apparently-hung command long before the ten minutes is up,
and will thus see what looks to be a silent hang.

This commit therefore adds pr_info() messages that are printed earlier.
These should avoid being classified as errors, but should give impatient
users a hint.  These are controlled by new rcupdate.rcu_task_stall_info
and rcupdate.rcu_task_stall_info_mult kernel-boot parameters.  The former
defines the initial delay in jiffies (defaulting to 10 seconds) and the
latter defines the multiplier (defaulting to 3).  Thus, by default, the
first message will appear 10 seconds into the RCU-tasks grace period,
the second 40 seconds in, and the third 160 seconds in.  There would be
a fourth at 640 seconds in, but the stall warning message appears 600
seconds in, and once a stall warning is printed for a given grace period,
no further informational messages are printed.

Signed-off-by: Paul E. McKenney <paulmck@kernel.org>
This commit is contained in:
Paul E. McKenney 2022-02-25 16:01:12 -08:00
parent f75fd4b922
commit f25390033f
2 changed files with 62 additions and 8 deletions

View File

@ -4955,10 +4955,34 @@
number avoids disturbing real-time workloads, number avoids disturbing real-time workloads,
but lengthens grace periods. but lengthens grace periods.
rcupdate.rcu_task_stall_info= [KNL]
Set initial timeout in jiffies for RCU task stall
informational messages, which give some indication
of the problem for those not patient enough to
wait for ten minutes. Informational messages are
only printed prior to the stall-warning message
for a given grace period. Disable with a value
less than or equal to zero. Defaults to ten
seconds. A change in value does not take effect
until the beginning of the next grace period.
rcupdate.rcu_task_stall_info_mult= [KNL]
Multiplier for time interval between successive
RCU task stall informational messages for a given
RCU tasks grace period. This value is clamped
to one through ten, inclusive. It defaults to
the value three, so that the first informational
message is printed 10 seconds into the grace
period, the second at 40 seconds, the third at
160 seconds, and then the stall warning at 600
seconds would prevent a fourth at 640 seconds.
rcupdate.rcu_task_stall_timeout= [KNL] rcupdate.rcu_task_stall_timeout= [KNL]
Set timeout in jiffies for RCU task stall warning Set timeout in jiffies for RCU task stall
messages. Disable with a value less than or equal warning messages. Disable with a value less
to zero. than or equal to zero. Defaults to ten minutes.
A change in value does not take effect until
the beginning of the next grace period.
rcupdate.rcu_self_test= [KNL] rcupdate.rcu_self_test= [KNL]
Run the RCU early boot self tests Run the RCU early boot self tests

View File

@ -143,6 +143,11 @@ module_param(rcu_task_ipi_delay, int, 0644);
#define RCU_TASK_STALL_TIMEOUT (HZ * 60 * 10) #define RCU_TASK_STALL_TIMEOUT (HZ * 60 * 10)
static int rcu_task_stall_timeout __read_mostly = RCU_TASK_STALL_TIMEOUT; static int rcu_task_stall_timeout __read_mostly = RCU_TASK_STALL_TIMEOUT;
module_param(rcu_task_stall_timeout, int, 0644); module_param(rcu_task_stall_timeout, int, 0644);
#define RCU_TASK_STALL_INFO (HZ * 10)
static int rcu_task_stall_info __read_mostly = RCU_TASK_STALL_INFO;
module_param(rcu_task_stall_info, int, 0644);
static int rcu_task_stall_info_mult __read_mostly = 3;
module_param(rcu_task_stall_info_mult, int, 0444);
static int rcu_task_enqueue_lim __read_mostly = -1; static int rcu_task_enqueue_lim __read_mostly = -1;
module_param(rcu_task_enqueue_lim, int, 0444); module_param(rcu_task_enqueue_lim, int, 0444);
@ -548,8 +553,15 @@ static void __init rcu_spawn_tasks_kthread_generic(struct rcu_tasks *rtp)
static void __init rcu_tasks_bootup_oddness(void) static void __init rcu_tasks_bootup_oddness(void)
{ {
#if defined(CONFIG_TASKS_RCU) || defined(CONFIG_TASKS_TRACE_RCU) #if defined(CONFIG_TASKS_RCU) || defined(CONFIG_TASKS_TRACE_RCU)
int rtsimc;
if (rcu_task_stall_timeout != RCU_TASK_STALL_TIMEOUT) if (rcu_task_stall_timeout != RCU_TASK_STALL_TIMEOUT)
pr_info("\tTasks-RCU CPU stall warnings timeout set to %d (rcu_task_stall_timeout).\n", rcu_task_stall_timeout); pr_info("\tTasks-RCU CPU stall warnings timeout set to %d (rcu_task_stall_timeout).\n", rcu_task_stall_timeout);
rtsimc = clamp(rcu_task_stall_info_mult, 1, 10);
if (rtsimc != rcu_task_stall_info_mult) {
pr_info("\tTasks-RCU CPU stall info multiplier clamped to %d (rcu_task_stall_info_mult).\n", rtsimc);
rcu_task_stall_info_mult = rtsimc;
}
#endif /* #ifdef CONFIG_TASKS_RCU */ #endif /* #ifdef CONFIG_TASKS_RCU */
#ifdef CONFIG_TASKS_RCU #ifdef CONFIG_TASKS_RCU
pr_info("\tTrampoline variant of Tasks RCU enabled.\n"); pr_info("\tTrampoline variant of Tasks RCU enabled.\n");
@ -592,10 +604,15 @@ static void exit_tasks_rcu_finish_trace(struct task_struct *t);
/* Wait for one RCU-tasks grace period. */ /* Wait for one RCU-tasks grace period. */
static void rcu_tasks_wait_gp(struct rcu_tasks *rtp) static void rcu_tasks_wait_gp(struct rcu_tasks *rtp)
{ {
struct task_struct *g, *t; struct task_struct *g;
unsigned long lastreport;
LIST_HEAD(holdouts);
int fract; int fract;
LIST_HEAD(holdouts);
unsigned long j;
unsigned long lastinfo;
unsigned long lastreport;
bool reported = false;
int rtsi;
struct task_struct *t;
set_tasks_gp_state(rtp, RTGS_PRE_WAIT_GP); set_tasks_gp_state(rtp, RTGS_PRE_WAIT_GP);
rtp->pregp_func(); rtp->pregp_func();
@ -621,6 +638,8 @@ static void rcu_tasks_wait_gp(struct rcu_tasks *rtp)
* is empty, we are done. * is empty, we are done.
*/ */
lastreport = jiffies; lastreport = jiffies;
lastinfo = lastreport;
rtsi = READ_ONCE(rcu_task_stall_info);
// Start off with initial wait and slowly back off to 1 HZ wait. // Start off with initial wait and slowly back off to 1 HZ wait.
fract = rtp->init_fract; fract = rtp->init_fract;
@ -630,7 +649,7 @@ static void rcu_tasks_wait_gp(struct rcu_tasks *rtp)
bool needreport; bool needreport;
int rtst; int rtst;
/* Slowly back off waiting for holdouts */ // Slowly back off waiting for holdouts
set_tasks_gp_state(rtp, RTGS_WAIT_SCAN_HOLDOUTS); set_tasks_gp_state(rtp, RTGS_WAIT_SCAN_HOLDOUTS);
schedule_timeout_idle(fract); schedule_timeout_idle(fract);
@ -639,12 +658,23 @@ static void rcu_tasks_wait_gp(struct rcu_tasks *rtp)
rtst = READ_ONCE(rcu_task_stall_timeout); rtst = READ_ONCE(rcu_task_stall_timeout);
needreport = rtst > 0 && time_after(jiffies, lastreport + rtst); needreport = rtst > 0 && time_after(jiffies, lastreport + rtst);
if (needreport) if (needreport) {
lastreport = jiffies; lastreport = jiffies;
reported = true;
}
firstreport = true; firstreport = true;
WARN_ON(signal_pending(current)); WARN_ON(signal_pending(current));
set_tasks_gp_state(rtp, RTGS_SCAN_HOLDOUTS); set_tasks_gp_state(rtp, RTGS_SCAN_HOLDOUTS);
rtp->holdouts_func(&holdouts, needreport, &firstreport); rtp->holdouts_func(&holdouts, needreport, &firstreport);
// Print pre-stall informational messages if needed.
j = jiffies;
if (rtsi > 0 && !reported && time_after(j, lastinfo + rtsi)) {
lastinfo = j;
rtsi = rtsi * rcu_task_stall_info_mult;
pr_info("%s: %s grace period %lu is %lu jiffies old.\n",
__func__, rtp->kname, rtp->tasks_gp_seq, j - rtp->gp_start);
}
} }
set_tasks_gp_state(rtp, RTGS_POST_GP); set_tasks_gp_state(rtp, RTGS_POST_GP);