2009-01-15 22:08:40 +03:00
/*
* Detect Hung Task
*
* kernel / hung_task . c - kernel thread for detecting tasks stuck in D state
*
*/
# include <linux/mm.h>
# include <linux/cpu.h>
# include <linux/nmi.h>
# include <linux/init.h>
# include <linux/delay.h>
# include <linux/freezer.h>
# include <linux/kthread.h>
# include <linux/lockdep.h>
2011-05-23 22:51:41 +04:00
# include <linux/export.h>
2009-01-15 22:08:40 +03:00
# include <linux/sysctl.h>
2013-08-01 20:59:41 +04:00
# include <linux/utsname.h>
2017-02-08 20:51:30 +03:00
# include <linux/sched/signal.h>
2017-02-08 20:51:35 +03:00
# include <linux/sched/debug.h>
2017-02-08 20:51:30 +03:00
2013-10-19 20:18:28 +04:00
# include <trace/events/sched.h>
2009-01-15 22:08:40 +03:00
/*
2009-02-05 07:35:48 +03:00
* The number of tasks checked :
2009-01-15 22:08:40 +03:00
*/
2013-09-23 12:43:58 +04:00
int __read_mostly sysctl_hung_task_check_count = PID_MAX_LIMIT ;
2009-02-05 07:35:48 +03:00
/*
* Limit number of tasks checked in a batch .
*
* This value controls the preemptibility of khungtaskd since preemption
* is disabled during the critical section . It also controls the size of
* the RCU grace period . So it needs to be upper - bound .
*/
# define HUNG_TASK_BATCHING 1024
2009-01-15 22:08:40 +03:00
/*
* Zero means infinite timeout - no checking done :
*/
2011-04-27 22:27:24 +04:00
unsigned long __read_mostly sysctl_hung_task_timeout_secs = CONFIG_DEFAULT_HUNG_TASK_TIMEOUT ;
2009-01-15 22:08:40 +03:00
2014-01-20 21:34:13 +04:00
int __read_mostly sysctl_hung_task_warnings = 10 ;
2009-01-15 22:08:40 +03:00
static int __read_mostly did_panic ;
2017-05-09 01:55:11 +03:00
static bool hung_task_show_lock ;
2009-01-15 22:08:40 +03:00
static struct task_struct * watchdog_task ;
/*
* Should we panic ( and reboot , if panic_timeout = is set ) when a
* hung task is detected :
*/
unsigned int __read_mostly sysctl_hung_task_panic =
CONFIG_BOOTPARAM_HUNG_TASK_PANIC_VALUE ;
static int __init hung_task_panic_setup ( char * str )
{
2014-06-05 03:11:26 +04:00
int rc = kstrtouint ( str , 0 , & sysctl_hung_task_panic ) ;
2009-01-15 22:08:40 +03:00
2014-06-05 03:11:26 +04:00
if ( rc )
return rc ;
2009-01-15 22:08:40 +03:00
return 1 ;
}
__setup ( " hung_task_panic= " , hung_task_panic_setup ) ;
static int
hung_task_panic ( struct notifier_block * this , unsigned long event , void * ptr )
{
did_panic = 1 ;
return NOTIFY_DONE ;
}
static struct notifier_block panic_block = {
. notifier_call = hung_task_panic ,
} ;
2009-02-07 02:37:47 +03:00
static void check_hung_task ( struct task_struct * t , unsigned long timeout )
2009-01-15 22:08:40 +03:00
{
unsigned long switch_count = t - > nvcsw + t - > nivcsw ;
2009-02-10 18:52:37 +03:00
/*
* Ensure the task is not frozen .
2012-01-04 02:41:13 +04:00
* Also , skip vfork and any other user process that freezer should skip .
2009-02-10 18:52:37 +03:00
*/
2012-01-04 02:41:13 +04:00
if ( unlikely ( t - > flags & ( PF_FROZEN | PF_FREEZER_SKIP ) ) )
return ;
/*
* When a freshly created task is scheduled once , changes its state to
* TASK_UNINTERRUPTIBLE without having ever been switched out once , it
* musn ' t be checked .
*/
if ( unlikely ( ! switch_count ) )
2009-01-15 22:08:40 +03:00
return ;
2009-02-07 02:37:47 +03:00
if ( switch_count ! = t - > last_switch_count ) {
2009-01-15 22:08:40 +03:00
t - > last_switch_count = switch_count ;
return ;
}
2013-10-19 20:18:28 +04:00
trace_sched_process_hang ( t ) ;
2016-10-11 23:55:56 +03:00
if ( ! sysctl_hung_task_warnings & & ! sysctl_hung_task_panic )
2009-01-15 22:08:40 +03:00
return ;
2014-01-20 21:34:13 +04:00
2009-01-15 22:08:40 +03:00
/*
* Ok , the task did not get scheduled for more than 2 minutes ,
* complain :
*/
2016-10-11 23:55:56 +03:00
if ( sysctl_hung_task_warnings ) {
2016-12-13 03:45:35 +03:00
if ( sysctl_hung_task_warnings > 0 )
sysctl_hung_task_warnings - - ;
2016-10-11 23:55:56 +03:00
pr_err ( " INFO: task %s:%d blocked for more than %ld seconds. \n " ,
t - > comm , t - > pid , timeout ) ;
pr_err ( " %s %s %.*s \n " ,
print_tainted ( ) , init_utsname ( ) - > release ,
( int ) strcspn ( init_utsname ( ) - > version , " " ) ,
init_utsname ( ) - > version ) ;
pr_err ( " \" echo 0 > /proc/sys/kernel/hung_task_timeout_secs \" "
" disables this message. \n " ) ;
sched_show_task ( t ) ;
2017-05-09 01:55:11 +03:00
hung_task_show_lock = true ;
2016-10-11 23:55:56 +03:00
}
2009-01-15 22:08:40 +03:00
touch_nmi_watchdog ( ) ;
2012-03-16 01:47:20 +04:00
if ( sysctl_hung_task_panic ) {
2017-05-09 01:55:11 +03:00
if ( hung_task_show_lock )
debug_show_all_locks ( ) ;
2012-03-16 01:47:20 +04:00
trigger_all_cpu_backtrace ( ) ;
2009-01-15 22:08:40 +03:00
panic ( " hung_task: blocked tasks " ) ;
2012-03-16 01:47:20 +04:00
}
2009-01-15 22:08:40 +03:00
}
2009-02-05 07:35:48 +03:00
/*
* To avoid extending the RCU grace period for an unbounded amount of time ,
* periodically exit the critical section and enter a new one .
*
* For preemptible RCU it is sufficient to call rcu_read_unlock in order
2010-08-05 19:10:54 +04:00
* to exit the grace period . For classic RCU , a reschedule is required .
2009-02-05 07:35:48 +03:00
*/
2012-03-06 02:59:14 +04:00
static bool rcu_lock_break ( struct task_struct * g , struct task_struct * t )
2009-02-05 07:35:48 +03:00
{
2012-03-06 02:59:14 +04:00
bool can_cont ;
2009-02-05 07:35:48 +03:00
get_task_struct ( g ) ;
get_task_struct ( t ) ;
rcu_read_unlock ( ) ;
cond_resched ( ) ;
rcu_read_lock ( ) ;
2012-03-06 02:59:14 +04:00
can_cont = pid_alive ( g ) & & pid_alive ( t ) ;
2009-02-05 07:35:48 +03:00
put_task_struct ( t ) ;
put_task_struct ( g ) ;
2012-03-06 02:59:14 +04:00
return can_cont ;
2009-02-05 07:35:48 +03:00
}
2009-01-15 22:08:40 +03:00
/*
* Check whether a TASK_UNINTERRUPTIBLE does not get woken up for
* a really long time ( 120 seconds ) . If that happens , print out
* a warning .
*/
2009-01-17 21:31:48 +03:00
static void check_hung_uninterruptible_tasks ( unsigned long timeout )
2009-01-15 22:08:40 +03:00
{
int max_count = sysctl_hung_task_check_count ;
2009-02-05 07:35:48 +03:00
int batch_count = HUNG_TASK_BATCHING ;
2009-01-15 22:08:40 +03:00
struct task_struct * g , * t ;
/*
* If the system crashed already then all bets are off ,
* do not report extra hung tasks :
*/
if ( test_taint ( TAINT_DIE ) | | did_panic )
return ;
2017-05-09 01:55:11 +03:00
hung_task_show_lock = false ;
2009-02-05 20:56:08 +03:00
rcu_read_lock ( ) ;
2015-04-16 02:16:47 +03:00
for_each_process_thread ( g , t ) {
2009-11-27 05:28:20 +03:00
if ( ! max_count - - )
2009-01-15 22:08:40 +03:00
goto unlock ;
2009-02-05 07:35:48 +03:00
if ( ! - - batch_count ) {
batch_count = HUNG_TASK_BATCHING ;
2012-03-06 02:59:14 +04:00
if ( ! rcu_lock_break ( g , t ) )
2009-02-05 07:35:48 +03:00
goto unlock ;
}
2009-01-15 22:08:40 +03:00
/* use "==" to skip the TASK_KILLABLE tasks waiting on NFS */
if ( t - > state = = TASK_UNINTERRUPTIBLE )
2009-02-07 02:37:47 +03:00
check_hung_task ( t , timeout ) ;
2015-04-16 02:16:47 +03:00
}
2009-01-15 22:08:40 +03:00
unlock :
2009-02-05 20:56:08 +03:00
rcu_read_unlock ( ) ;
2017-05-09 01:55:11 +03:00
if ( hung_task_show_lock )
debug_show_all_locks ( ) ;
2009-01-15 22:08:40 +03:00
}
2016-03-23 00:24:39 +03:00
static long hung_timeout_jiffies ( unsigned long last_checked ,
unsigned long timeout )
2009-01-15 22:08:40 +03:00
{
/* timeout of 0 will disable the watchdog */
2016-03-23 00:24:39 +03:00
return timeout ? last_checked - jiffies + timeout * HZ :
MAX_SCHEDULE_TIMEOUT ;
2009-01-15 22:08:40 +03:00
}
/*
* Process updating of timeout sysctl
*/
int proc_dohung_task_timeout_secs ( struct ctl_table * table , int write ,
2009-09-24 02:57:19 +04:00
void __user * buffer ,
2009-01-15 22:08:40 +03:00
size_t * lenp , loff_t * ppos )
{
int ret ;
2009-09-24 02:57:19 +04:00
ret = proc_doulongvec_minmax ( table , write , buffer , lenp , ppos ) ;
2009-01-15 22:08:40 +03:00
if ( ret | | ! write )
goto out ;
wake_up_process ( watchdog_task ) ;
out :
return ret ;
}
2013-10-12 04:39:26 +04:00
static atomic_t reset_hung_task = ATOMIC_INIT ( 0 ) ;
void reset_hung_task_detector ( void )
{
atomic_set ( & reset_hung_task , 1 ) ;
}
EXPORT_SYMBOL_GPL ( reset_hung_task_detector ) ;
2009-01-15 22:08:40 +03:00
/*
* kthread which checks for tasks stuck in D state
*/
static int watchdog ( void * dummy )
{
2016-03-23 00:24:39 +03:00
unsigned long hung_last_checked = jiffies ;
2009-01-15 22:08:40 +03:00
set_user_nice ( current , 0 ) ;
for ( ; ; ) {
2009-02-07 02:37:47 +03:00
unsigned long timeout = sysctl_hung_task_timeout_secs ;
2016-03-23 00:24:39 +03:00
long t = hung_timeout_jiffies ( hung_last_checked , timeout ) ;
2009-01-17 21:31:48 +03:00
2016-03-23 00:24:39 +03:00
if ( t < = 0 ) {
if ( ! atomic_xchg ( & reset_hung_task , 0 ) )
check_hung_uninterruptible_tasks ( timeout ) ;
hung_last_checked = jiffies ;
2013-10-12 04:39:26 +04:00
continue ;
2016-03-23 00:24:39 +03:00
}
schedule_timeout_interruptible ( t ) ;
2009-01-15 22:08:40 +03:00
}
return 0 ;
}
static int __init hung_task_init ( void )
{
atomic_notifier_chain_register ( & panic_notifier_list , & panic_block ) ;
watchdog_task = kthread_run ( watchdog , NULL , " khungtaskd " ) ;
return 0 ;
}
2014-04-04 01:48:35 +04:00
subsys_initcall ( hung_task_init ) ;