2009-01-15 11:08:40 -08:00
/*
* Detect Hung Task
*
* kernel / hung_task . c - kernel thread for detecting tasks stuck in D state
*
*/
# include <linux/mm.h>
# include <linux/cpu.h>
# include <linux/nmi.h>
# include <linux/init.h>
# include <linux/delay.h>
# include <linux/freezer.h>
# include <linux/kthread.h>
# include <linux/lockdep.h>
2011-05-23 14:51:41 -04:00
# include <linux/export.h>
2009-01-15 11:08:40 -08:00
# include <linux/sysctl.h>
2013-08-01 18:59:41 +02:00
# include <linux/utsname.h>
2017-02-08 18:51:30 +01:00
# include <linux/sched/signal.h>
2017-02-08 18:51:35 +01:00
# include <linux/sched/debug.h>
2017-02-08 18:51:30 +01:00
2013-10-19 18:18:28 +02:00
# include <trace/events/sched.h>
2009-01-15 11:08:40 -08:00
/*
2009-02-04 20:35:48 -08:00
* The number of tasks checked :
2009-01-15 11:08:40 -08:00
*/
2013-09-23 16:43:58 +08:00
int __read_mostly sysctl_hung_task_check_count = PID_MAX_LIMIT ;
2009-02-04 20:35:48 -08:00
/*
* Limit number of tasks checked in a batch .
*
* This value controls the preemptibility of khungtaskd since preemption
* is disabled during the critical section . It also controls the size of
* the RCU grace period . So it needs to be upper - bound .
*/
# define HUNG_TASK_BATCHING 1024
2009-01-15 11:08:40 -08:00
/*
* Zero means infinite timeout - no checking done :
*/
2011-04-27 14:27:24 -04:00
unsigned long __read_mostly sysctl_hung_task_timeout_secs = CONFIG_DEFAULT_HUNG_TASK_TIMEOUT ;
2009-01-15 11:08:40 -08:00
2014-01-20 17:34:13 +00:00
int __read_mostly sysctl_hung_task_warnings = 10 ;
2009-01-15 11:08:40 -08:00
static int __read_mostly did_panic ;
static struct task_struct * watchdog_task ;
/*
* Should we panic ( and reboot , if panic_timeout = is set ) when a
* hung task is detected :
*/
unsigned int __read_mostly sysctl_hung_task_panic =
CONFIG_BOOTPARAM_HUNG_TASK_PANIC_VALUE ;
static int __init hung_task_panic_setup ( char * str )
{
2014-06-04 16:11:26 -07:00
int rc = kstrtouint ( str , 0 , & sysctl_hung_task_panic ) ;
2009-01-15 11:08:40 -08:00
2014-06-04 16:11:26 -07:00
if ( rc )
return rc ;
2009-01-15 11:08:40 -08:00
return 1 ;
}
__setup ( " hung_task_panic= " , hung_task_panic_setup ) ;
static int
hung_task_panic ( struct notifier_block * this , unsigned long event , void * ptr )
{
did_panic = 1 ;
return NOTIFY_DONE ;
}
static struct notifier_block panic_block = {
. notifier_call = hung_task_panic ,
} ;
2009-02-06 15:37:47 -08:00
static void check_hung_task ( struct task_struct * t , unsigned long timeout )
2009-01-15 11:08:40 -08:00
{
unsigned long switch_count = t - > nvcsw + t - > nivcsw ;
2009-02-10 16:52:37 +01:00
/*
* Ensure the task is not frozen .
2012-01-03 14:41:13 -08:00
* Also , skip vfork and any other user process that freezer should skip .
2009-02-10 16:52:37 +01:00
*/
2012-01-03 14:41:13 -08:00
if ( unlikely ( t - > flags & ( PF_FROZEN | PF_FREEZER_SKIP ) ) )
return ;
/*
* When a freshly created task is scheduled once , changes its state to
* TASK_UNINTERRUPTIBLE without having ever been switched out once , it
* musn ' t be checked .
*/
if ( unlikely ( ! switch_count ) )
2009-01-15 11:08:40 -08:00
return ;
2009-02-06 15:37:47 -08:00
if ( switch_count ! = t - > last_switch_count ) {
2009-01-15 11:08:40 -08:00
t - > last_switch_count = switch_count ;
return ;
}
2013-10-19 18:18:28 +02:00
trace_sched_process_hang ( t ) ;
2016-10-11 13:55:56 -07:00
if ( ! sysctl_hung_task_warnings & & ! sysctl_hung_task_panic )
2009-01-15 11:08:40 -08:00
return ;
2014-01-20 17:34:13 +00:00
2009-01-15 11:08:40 -08:00
/*
* Ok , the task did not get scheduled for more than 2 minutes ,
* complain :
*/
2016-10-11 13:55:56 -07:00
if ( sysctl_hung_task_warnings ) {
2016-12-12 16:45:35 -08:00
if ( sysctl_hung_task_warnings > 0 )
sysctl_hung_task_warnings - - ;
2016-10-11 13:55:56 -07:00
pr_err ( " INFO: task %s:%d blocked for more than %ld seconds. \n " ,
t - > comm , t - > pid , timeout ) ;
pr_err ( " %s %s %.*s \n " ,
print_tainted ( ) , init_utsname ( ) - > release ,
( int ) strcspn ( init_utsname ( ) - > version , " " ) ,
init_utsname ( ) - > version ) ;
pr_err ( " \" echo 0 > /proc/sys/kernel/hung_task_timeout_secs \" "
" disables this message. \n " ) ;
sched_show_task ( t ) ;
debug_show_all_locks ( ) ;
}
2009-01-15 11:08:40 -08:00
touch_nmi_watchdog ( ) ;
2012-03-15 17:47:20 -04:00
if ( sysctl_hung_task_panic ) {
trigger_all_cpu_backtrace ( ) ;
2009-01-15 11:08:40 -08:00
panic ( " hung_task: blocked tasks " ) ;
2012-03-15 17:47:20 -04:00
}
2009-01-15 11:08:40 -08:00
}
2009-02-04 20:35:48 -08:00
/*
* To avoid extending the RCU grace period for an unbounded amount of time ,
* periodically exit the critical section and enter a new one .
*
* For preemptible RCU it is sufficient to call rcu_read_unlock in order
2010-08-05 17:10:54 +02:00
* to exit the grace period . For classic RCU , a reschedule is required .
2009-02-04 20:35:48 -08:00
*/
2012-03-05 14:59:14 -08:00
static bool rcu_lock_break ( struct task_struct * g , struct task_struct * t )
2009-02-04 20:35:48 -08:00
{
2012-03-05 14:59:14 -08:00
bool can_cont ;
2009-02-04 20:35:48 -08:00
get_task_struct ( g ) ;
get_task_struct ( t ) ;
rcu_read_unlock ( ) ;
cond_resched ( ) ;
rcu_read_lock ( ) ;
2012-03-05 14:59:14 -08:00
can_cont = pid_alive ( g ) & & pid_alive ( t ) ;
2009-02-04 20:35:48 -08:00
put_task_struct ( t ) ;
put_task_struct ( g ) ;
2012-03-05 14:59:14 -08:00
return can_cont ;
2009-02-04 20:35:48 -08:00
}
2009-01-15 11:08:40 -08:00
/*
* Check whether a TASK_UNINTERRUPTIBLE does not get woken up for
* a really long time ( 120 seconds ) . If that happens , print out
* a warning .
*/
2009-01-17 10:31:48 -08:00
static void check_hung_uninterruptible_tasks ( unsigned long timeout )
2009-01-15 11:08:40 -08:00
{
int max_count = sysctl_hung_task_check_count ;
2009-02-04 20:35:48 -08:00
int batch_count = HUNG_TASK_BATCHING ;
2009-01-15 11:08:40 -08:00
struct task_struct * g , * t ;
/*
* If the system crashed already then all bets are off ,
* do not report extra hung tasks :
*/
if ( test_taint ( TAINT_DIE ) | | did_panic )
return ;
2009-02-05 09:56:08 -08:00
rcu_read_lock ( ) ;
2015-04-15 16:16:47 -07:00
for_each_process_thread ( g , t ) {
2009-11-27 13:28:20 +11:00
if ( ! max_count - - )
2009-01-15 11:08:40 -08:00
goto unlock ;
2009-02-04 20:35:48 -08:00
if ( ! - - batch_count ) {
batch_count = HUNG_TASK_BATCHING ;
2012-03-05 14:59:14 -08:00
if ( ! rcu_lock_break ( g , t ) )
2009-02-04 20:35:48 -08:00
goto unlock ;
}
2009-01-15 11:08:40 -08:00
/* use "==" to skip the TASK_KILLABLE tasks waiting on NFS */
if ( t - > state = = TASK_UNINTERRUPTIBLE )
2009-02-06 15:37:47 -08:00
check_hung_task ( t , timeout ) ;
2015-04-15 16:16:47 -07:00
}
2009-01-15 11:08:40 -08:00
unlock :
2009-02-05 09:56:08 -08:00
rcu_read_unlock ( ) ;
2009-01-15 11:08:40 -08:00
}
2016-03-22 14:24:39 -07:00
static long hung_timeout_jiffies ( unsigned long last_checked ,
unsigned long timeout )
2009-01-15 11:08:40 -08:00
{
/* timeout of 0 will disable the watchdog */
2016-03-22 14:24:39 -07:00
return timeout ? last_checked - jiffies + timeout * HZ :
MAX_SCHEDULE_TIMEOUT ;
2009-01-15 11:08:40 -08:00
}
/*
* Process updating of timeout sysctl
*/
int proc_dohung_task_timeout_secs ( struct ctl_table * table , int write ,
2009-09-23 15:57:19 -07:00
void __user * buffer ,
2009-01-15 11:08:40 -08:00
size_t * lenp , loff_t * ppos )
{
int ret ;
2009-09-23 15:57:19 -07:00
ret = proc_doulongvec_minmax ( table , write , buffer , lenp , ppos ) ;
2009-01-15 11:08:40 -08:00
if ( ret | | ! write )
goto out ;
wake_up_process ( watchdog_task ) ;
out :
return ret ;
}
2013-10-11 21:39:26 -03:00
static atomic_t reset_hung_task = ATOMIC_INIT ( 0 ) ;
void reset_hung_task_detector ( void )
{
atomic_set ( & reset_hung_task , 1 ) ;
}
EXPORT_SYMBOL_GPL ( reset_hung_task_detector ) ;
2009-01-15 11:08:40 -08:00
/*
* kthread which checks for tasks stuck in D state
*/
static int watchdog ( void * dummy )
{
2016-03-22 14:24:39 -07:00
unsigned long hung_last_checked = jiffies ;
2009-01-15 11:08:40 -08:00
set_user_nice ( current , 0 ) ;
for ( ; ; ) {
2009-02-06 15:37:47 -08:00
unsigned long timeout = sysctl_hung_task_timeout_secs ;
2016-03-22 14:24:39 -07:00
long t = hung_timeout_jiffies ( hung_last_checked , timeout ) ;
2009-01-17 10:31:48 -08:00
2016-03-22 14:24:39 -07:00
if ( t < = 0 ) {
if ( ! atomic_xchg ( & reset_hung_task , 0 ) )
check_hung_uninterruptible_tasks ( timeout ) ;
hung_last_checked = jiffies ;
2013-10-11 21:39:26 -03:00
continue ;
2016-03-22 14:24:39 -07:00
}
schedule_timeout_interruptible ( t ) ;
2009-01-15 11:08:40 -08:00
}
return 0 ;
}
static int __init hung_task_init ( void )
{
atomic_notifier_chain_register ( & panic_notifier_list , & panic_block ) ;
watchdog_task = kthread_run ( watchdog , NULL , " khungtaskd " ) ;
return 0 ;
}
2014-04-03 14:48:35 -07:00
subsys_initcall ( hung_task_init ) ;