2019-05-19 15:08:55 +03:00
// SPDX-License-Identifier: GPL-2.0-only
2009-01-15 22:08:40 +03:00
/*
* Detect Hung Task
*
* kernel / hung_task . c - kernel thread for detecting tasks stuck in D state
*
*/
# include <linux/mm.h>
# include <linux/cpu.h>
# include <linux/nmi.h>
# include <linux/init.h>
# include <linux/delay.h>
# include <linux/freezer.h>
# include <linux/kthread.h>
# include <linux/lockdep.h>
2011-05-23 22:51:41 +04:00
# include <linux/export.h>
2021-07-01 04:54:59 +03:00
# include <linux/panic_notifier.h>
2009-01-15 22:08:40 +03:00
# include <linux/sysctl.h>
2018-10-17 14:23:55 +03:00
# include <linux/suspend.h>
2013-08-01 20:59:41 +04:00
# include <linux/utsname.h>
2017-02-08 20:51:30 +03:00
# include <linux/sched/signal.h>
2017-02-08 20:51:35 +03:00
# include <linux/sched/debug.h>
2019-03-08 03:26:46 +03:00
# include <linux/sched/sysctl.h>
2017-02-08 20:51:30 +03:00
2013-10-19 20:18:28 +04:00
# include <trace/events/sched.h>
2009-01-15 22:08:40 +03:00
/*
2009-02-05 07:35:48 +03:00
* The number of tasks checked :
2009-01-15 22:08:40 +03:00
*/
2013-09-23 12:43:58 +04:00
int __read_mostly sysctl_hung_task_check_count = PID_MAX_LIMIT ;
2009-02-05 07:35:48 +03:00
/*
* Limit number of tasks checked in a batch .
*
* This value controls the preemptibility of khungtaskd since preemption
* is disabled during the critical section . It also controls the size of
* the RCU grace period . So it needs to be upper - bound .
*/
2019-01-04 02:26:31 +03:00
# define HUNG_TASK_LOCK_BREAK (HZ / 10)
2009-01-15 22:08:40 +03:00
/*
* Zero means infinite timeout - no checking done :
*/
2011-04-27 22:27:24 +04:00
unsigned long __read_mostly sysctl_hung_task_timeout_secs = CONFIG_DEFAULT_HUNG_TASK_TIMEOUT ;
2009-01-15 22:08:40 +03:00
2018-08-22 07:55:52 +03:00
/*
* Zero ( default value ) means use sysctl_hung_task_timeout_secs :
*/
unsigned long __read_mostly sysctl_hung_task_check_interval_secs ;
2014-01-20 21:34:13 +04:00
int __read_mostly sysctl_hung_task_warnings = 10 ;
2009-01-15 22:08:40 +03:00
static int __read_mostly did_panic ;
2017-05-09 01:55:11 +03:00
static bool hung_task_show_lock ;
kernel/hung_task.c: show all hung tasks before panic
When we get a hung task it can often be valuable to see _all_ the hung
tasks on the system before calling panic().
Quoting from https://syzkaller.appspot.com/text?tag=CrashReport&id=5316056503549952
----------------------------------------
INFO: task syz-executor0:6540 blocked for more than 120 seconds.
Not tainted 4.16.0+ #13
"echo 0 > /proc/sys/kernel/hung_task_timeout_secs" disables this message.
syz-executor0 D23560 6540 4521 0x80000004
Call Trace:
context_switch kernel/sched/core.c:2848 [inline]
__schedule+0x8fb/0x1ef0 kernel/sched/core.c:3490
schedule+0xf5/0x430 kernel/sched/core.c:3549
schedule_preempt_disabled+0x10/0x20 kernel/sched/core.c:3607
__mutex_lock_common kernel/locking/mutex.c:833 [inline]
__mutex_lock+0xb7f/0x1810 kernel/locking/mutex.c:893
mutex_lock_nested+0x16/0x20 kernel/locking/mutex.c:908
lo_ioctl+0x8b/0x1b70 drivers/block/loop.c:1355
__blkdev_driver_ioctl block/ioctl.c:303 [inline]
blkdev_ioctl+0x1759/0x1e00 block/ioctl.c:601
ioctl_by_bdev+0xa5/0x110 fs/block_dev.c:2060
isofs_get_last_session fs/isofs/inode.c:567 [inline]
isofs_fill_super+0x2ba9/0x3bc0 fs/isofs/inode.c:660
mount_bdev+0x2b7/0x370 fs/super.c:1119
isofs_mount+0x34/0x40 fs/isofs/inode.c:1560
mount_fs+0x66/0x2d0 fs/super.c:1222
vfs_kern_mount.part.26+0xc6/0x4a0 fs/namespace.c:1037
vfs_kern_mount fs/namespace.c:2514 [inline]
do_new_mount fs/namespace.c:2517 [inline]
do_mount+0xea4/0x2b90 fs/namespace.c:2847
ksys_mount+0xab/0x120 fs/namespace.c:3063
SYSC_mount fs/namespace.c:3077 [inline]
SyS_mount+0x39/0x50 fs/namespace.c:3074
do_syscall_64+0x281/0x940 arch/x86/entry/common.c:287
entry_SYSCALL_64_after_hwframe+0x42/0xb7
(...snipped...)
Showing all locks held in the system:
(...snipped...)
2 locks held by syz-executor0/6540:
#0: 00000000566d4c39 (&type->s_umount_key#49/1){+.+.}, at: alloc_super fs/super.c:211 [inline]
#0: 00000000566d4c39 (&type->s_umount_key#49/1){+.+.}, at: sget_userns+0x3b2/0xe60 fs/super.c:502 /* down_write_nested(&s->s_umount, SINGLE_DEPTH_NESTING); */
#1: 0000000043ca8836 (&lo->lo_ctl_mutex/1){+.+.}, at: lo_ioctl+0x8b/0x1b70 drivers/block/loop.c:1355 /* mutex_lock_nested(&lo->lo_ctl_mutex, 1); */
(...snipped...)
3 locks held by syz-executor7/6541:
#0: 0000000043ca8836 (&lo->lo_ctl_mutex/1){+.+.}, at: lo_ioctl+0x8b/0x1b70 drivers/block/loop.c:1355 /* mutex_lock_nested(&lo->lo_ctl_mutex, 1); */
#1: 000000007bf3d3f9 (&bdev->bd_mutex){+.+.}, at: blkdev_reread_part+0x1e/0x40 block/ioctl.c:192
#2: 00000000566d4c39 (&type->s_umount_key#50){.+.+}, at: __get_super.part.10+0x1d3/0x280 fs/super.c:663 /* down_read(&sb->s_umount); */
----------------------------------------
When reporting an AB-BA deadlock like shown above, it would be nice if
trace of PID=6541 is printed as well as trace of PID=6540 before calling
panic().
Showing hung tasks up to /proc/sys/kernel/hung_task_warnings could delay
calling panic() but normally there should not be so many hung tasks.
Link: http://lkml.kernel.org/r/201804050705.BHE57833.HVFOFtSOMQJFOL@I-love.SAKURA.ne.jp
Signed-off-by: Tetsuo Handa <penguin-kernel@I-love.SAKURA.ne.jp>
Acked-by: Paul E. McKenney <paulmck@linux.vnet.ibm.com>
Acked-by: Dmitry Vyukov <dvyukov@google.com>
Cc: Vegard Nossum <vegard.nossum@oracle.com>
Cc: Mandeep Singh Baines <msb@chromium.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: Ingo Molnar <mingo@kernel.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
2018-06-08 03:10:34 +03:00
static bool hung_task_call_panic ;
kernel/hung_task.c: introduce sysctl to print all traces when a hung task is detected
Commit 401c636a0eeb ("kernel/hung_task.c: show all hung tasks before
panic") introduced a change in that we started to show all CPUs
backtraces when a hung task is detected _and_ the sysctl/kernel
parameter "hung_task_panic" is set. The idea is good, because usually
when observing deadlocks (that may lead to hung tasks), the culprit is
another task holding a lock and not necessarily the task detected as
hung.
The problem with this approach is that dumping backtraces is a slightly
expensive task, specially printing that on console (and specially in
many CPU machines, as servers commonly found nowadays). So, users that
plan to collect a kdump to investigate the hung tasks and narrow down
the deadlock definitely don't need the CPUs backtrace on dmesg/console,
which will delay the panic and pollute the log (crash tool would easily
grab all CPUs traces with 'bt -a' command).
Also, there's the reciprocal scenario: some users may be interested in
seeing the CPUs backtraces but not have the system panic when a hung
task is detected. The current approach hence is almost as embedding a
policy in the kernel, by forcing the CPUs backtraces' dump (only) on
hung_task_panic.
This patch decouples the panic event on hung task from the CPUs
backtraces dump, by creating (and documenting) a new sysctl called
"hung_task_all_cpu_backtrace", analog to the approach taken on soft/hard
lockups, that have both a panic and an "all_cpu_backtrace" sysctl to
allow individual control. The new mechanism for dumping the CPUs
backtraces on hung task detection respects "hung_task_warnings" by not
dumping the traces in case there's no warnings left.
Signed-off-by: Guilherme G. Piccoli <gpiccoli@canonical.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Reviewed-by: Kees Cook <keescook@chromium.org>
Cc: Tetsuo Handa <penguin-kernel@I-love.SAKURA.ne.jp>
Link: http://lkml.kernel.org/r/20200327223646.20779-1-gpiccoli@canonical.com
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
2020-06-08 07:40:45 +03:00
static bool hung_task_show_all_bt ;
2009-01-15 22:08:40 +03:00
static struct task_struct * watchdog_task ;
kernel/hung_task.c: introduce sysctl to print all traces when a hung task is detected
Commit 401c636a0eeb ("kernel/hung_task.c: show all hung tasks before
panic") introduced a change in that we started to show all CPUs
backtraces when a hung task is detected _and_ the sysctl/kernel
parameter "hung_task_panic" is set. The idea is good, because usually
when observing deadlocks (that may lead to hung tasks), the culprit is
another task holding a lock and not necessarily the task detected as
hung.
The problem with this approach is that dumping backtraces is a slightly
expensive task, specially printing that on console (and specially in
many CPU machines, as servers commonly found nowadays). So, users that
plan to collect a kdump to investigate the hung tasks and narrow down
the deadlock definitely don't need the CPUs backtrace on dmesg/console,
which will delay the panic and pollute the log (crash tool would easily
grab all CPUs traces with 'bt -a' command).
Also, there's the reciprocal scenario: some users may be interested in
seeing the CPUs backtraces but not have the system panic when a hung
task is detected. The current approach hence is almost as embedding a
policy in the kernel, by forcing the CPUs backtraces' dump (only) on
hung_task_panic.
This patch decouples the panic event on hung task from the CPUs
backtraces dump, by creating (and documenting) a new sysctl called
"hung_task_all_cpu_backtrace", analog to the approach taken on soft/hard
lockups, that have both a panic and an "all_cpu_backtrace" sysctl to
allow individual control. The new mechanism for dumping the CPUs
backtraces on hung task detection respects "hung_task_warnings" by not
dumping the traces in case there's no warnings left.
Signed-off-by: Guilherme G. Piccoli <gpiccoli@canonical.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Reviewed-by: Kees Cook <keescook@chromium.org>
Cc: Tetsuo Handa <penguin-kernel@I-love.SAKURA.ne.jp>
Link: http://lkml.kernel.org/r/20200327223646.20779-1-gpiccoli@canonical.com
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
2020-06-08 07:40:45 +03:00
# ifdef CONFIG_SMP
/*
* Should we dump all CPUs backtraces in a hung task event ?
* Defaults to 0 , can be changed via sysctl .
*/
2022-01-22 09:11:00 +03:00
static unsigned int __read_mostly sysctl_hung_task_all_cpu_backtrace ;
# else
# define sysctl_hung_task_all_cpu_backtrace 0
kernel/hung_task.c: introduce sysctl to print all traces when a hung task is detected
Commit 401c636a0eeb ("kernel/hung_task.c: show all hung tasks before
panic") introduced a change in that we started to show all CPUs
backtraces when a hung task is detected _and_ the sysctl/kernel
parameter "hung_task_panic" is set. The idea is good, because usually
when observing deadlocks (that may lead to hung tasks), the culprit is
another task holding a lock and not necessarily the task detected as
hung.
The problem with this approach is that dumping backtraces is a slightly
expensive task, specially printing that on console (and specially in
many CPU machines, as servers commonly found nowadays). So, users that
plan to collect a kdump to investigate the hung tasks and narrow down
the deadlock definitely don't need the CPUs backtrace on dmesg/console,
which will delay the panic and pollute the log (crash tool would easily
grab all CPUs traces with 'bt -a' command).
Also, there's the reciprocal scenario: some users may be interested in
seeing the CPUs backtraces but not have the system panic when a hung
task is detected. The current approach hence is almost as embedding a
policy in the kernel, by forcing the CPUs backtraces' dump (only) on
hung_task_panic.
This patch decouples the panic event on hung task from the CPUs
backtraces dump, by creating (and documenting) a new sysctl called
"hung_task_all_cpu_backtrace", analog to the approach taken on soft/hard
lockups, that have both a panic and an "all_cpu_backtrace" sysctl to
allow individual control. The new mechanism for dumping the CPUs
backtraces on hung task detection respects "hung_task_warnings" by not
dumping the traces in case there's no warnings left.
Signed-off-by: Guilherme G. Piccoli <gpiccoli@canonical.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Reviewed-by: Kees Cook <keescook@chromium.org>
Cc: Tetsuo Handa <penguin-kernel@I-love.SAKURA.ne.jp>
Link: http://lkml.kernel.org/r/20200327223646.20779-1-gpiccoli@canonical.com
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
2020-06-08 07:40:45 +03:00
# endif /* CONFIG_SMP */
2009-01-15 22:08:40 +03:00
/*
* Should we panic ( and reboot , if panic_timeout = is set ) when a
* hung task is detected :
*/
unsigned int __read_mostly sysctl_hung_task_panic =
2022-04-30 00:38:00 +03:00
IS_ENABLED ( CONFIG_BOOTPARAM_HUNG_TASK_PANIC ) ;
2009-01-15 22:08:40 +03:00
static int
hung_task_panic ( struct notifier_block * this , unsigned long event , void * ptr )
{
did_panic = 1 ;
return NOTIFY_DONE ;
}
static struct notifier_block panic_block = {
. notifier_call = hung_task_panic ,
} ;
2009-02-07 02:37:47 +03:00
static void check_hung_task ( struct task_struct * t , unsigned long timeout )
2009-01-15 22:08:40 +03:00
{
unsigned long switch_count = t - > nvcsw + t - > nivcsw ;
2009-02-10 18:52:37 +03:00
/*
* Ensure the task is not frozen .
2012-01-04 02:41:13 +04:00
* Also , skip vfork and any other user process that freezer should skip .
2009-02-10 18:52:37 +03:00
*/
2012-01-04 02:41:13 +04:00
if ( unlikely ( t - > flags & ( PF_FROZEN | PF_FREEZER_SKIP ) ) )
return ;
/*
* When a freshly created task is scheduled once , changes its state to
* TASK_UNINTERRUPTIBLE without having ever been switched out once , it
* musn ' t be checked .
*/
if ( unlikely ( ! switch_count ) )
2009-01-15 22:08:40 +03:00
return ;
2009-02-07 02:37:47 +03:00
if ( switch_count ! = t - > last_switch_count ) {
2009-01-15 22:08:40 +03:00
t - > last_switch_count = switch_count ;
2018-08-22 07:55:52 +03:00
t - > last_switch_time = jiffies ;
2009-01-15 22:08:40 +03:00
return ;
}
2018-08-22 07:55:52 +03:00
if ( time_is_after_jiffies ( t - > last_switch_time + timeout * HZ ) )
return ;
2013-10-19 20:18:28 +04:00
trace_sched_process_hang ( t ) ;
2019-01-04 02:26:27 +03:00
if ( sysctl_hung_task_panic ) {
console_verbose ( ) ;
hung_task_show_lock = true ;
hung_task_call_panic = true ;
}
2014-01-20 21:34:13 +04:00
2009-01-15 22:08:40 +03:00
/*
* Ok , the task did not get scheduled for more than 2 minutes ,
* complain :
*/
2016-10-11 23:55:56 +03:00
if ( sysctl_hung_task_warnings ) {
2016-12-13 03:45:35 +03:00
if ( sysctl_hung_task_warnings > 0 )
sysctl_hung_task_warnings - - ;
2016-10-11 23:55:56 +03:00
pr_err ( " INFO: task %s:%d blocked for more than %ld seconds. \n " ,
2019-03-08 03:26:50 +03:00
t - > comm , t - > pid , ( jiffies - t - > last_switch_time ) / HZ ) ;
2016-10-11 23:55:56 +03:00
pr_err ( " %s %s %.*s \n " ,
print_tainted ( ) , init_utsname ( ) - > release ,
( int ) strcspn ( init_utsname ( ) - > version , " " ) ,
init_utsname ( ) - > version ) ;
pr_err ( " \" echo 0 > /proc/sys/kernel/hung_task_timeout_secs \" "
" disables this message. \n " ) ;
sched_show_task ( t ) ;
2017-05-09 01:55:11 +03:00
hung_task_show_lock = true ;
kernel/hung_task.c: introduce sysctl to print all traces when a hung task is detected
Commit 401c636a0eeb ("kernel/hung_task.c: show all hung tasks before
panic") introduced a change in that we started to show all CPUs
backtraces when a hung task is detected _and_ the sysctl/kernel
parameter "hung_task_panic" is set. The idea is good, because usually
when observing deadlocks (that may lead to hung tasks), the culprit is
another task holding a lock and not necessarily the task detected as
hung.
The problem with this approach is that dumping backtraces is a slightly
expensive task, specially printing that on console (and specially in
many CPU machines, as servers commonly found nowadays). So, users that
plan to collect a kdump to investigate the hung tasks and narrow down
the deadlock definitely don't need the CPUs backtrace on dmesg/console,
which will delay the panic and pollute the log (crash tool would easily
grab all CPUs traces with 'bt -a' command).
Also, there's the reciprocal scenario: some users may be interested in
seeing the CPUs backtraces but not have the system panic when a hung
task is detected. The current approach hence is almost as embedding a
policy in the kernel, by forcing the CPUs backtraces' dump (only) on
hung_task_panic.
This patch decouples the panic event on hung task from the CPUs
backtraces dump, by creating (and documenting) a new sysctl called
"hung_task_all_cpu_backtrace", analog to the approach taken on soft/hard
lockups, that have both a panic and an "all_cpu_backtrace" sysctl to
allow individual control. The new mechanism for dumping the CPUs
backtraces on hung task detection respects "hung_task_warnings" by not
dumping the traces in case there's no warnings left.
Signed-off-by: Guilherme G. Piccoli <gpiccoli@canonical.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Reviewed-by: Kees Cook <keescook@chromium.org>
Cc: Tetsuo Handa <penguin-kernel@I-love.SAKURA.ne.jp>
Link: http://lkml.kernel.org/r/20200327223646.20779-1-gpiccoli@canonical.com
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
2020-06-08 07:40:45 +03:00
if ( sysctl_hung_task_all_cpu_backtrace )
hung_task_show_all_bt = true ;
2016-10-11 23:55:56 +03:00
}
2009-01-15 22:08:40 +03:00
touch_nmi_watchdog ( ) ;
}
2009-02-05 07:35:48 +03:00
/*
* To avoid extending the RCU grace period for an unbounded amount of time ,
* periodically exit the critical section and enter a new one .
*
* For preemptible RCU it is sufficient to call rcu_read_unlock in order
2010-08-05 19:10:54 +04:00
* to exit the grace period . For classic RCU , a reschedule is required .
2009-02-05 07:35:48 +03:00
*/
2012-03-06 02:59:14 +04:00
static bool rcu_lock_break ( struct task_struct * g , struct task_struct * t )
2009-02-05 07:35:48 +03:00
{
2012-03-06 02:59:14 +04:00
bool can_cont ;
2009-02-05 07:35:48 +03:00
get_task_struct ( g ) ;
get_task_struct ( t ) ;
rcu_read_unlock ( ) ;
cond_resched ( ) ;
rcu_read_lock ( ) ;
2012-03-06 02:59:14 +04:00
can_cont = pid_alive ( g ) & & pid_alive ( t ) ;
2009-02-05 07:35:48 +03:00
put_task_struct ( t ) ;
put_task_struct ( g ) ;
2012-03-06 02:59:14 +04:00
return can_cont ;
2009-02-05 07:35:48 +03:00
}
2009-01-15 22:08:40 +03:00
/*
* Check whether a TASK_UNINTERRUPTIBLE does not get woken up for
* a really long time ( 120 seconds ) . If that happens , print out
* a warning .
*/
2009-01-17 21:31:48 +03:00
static void check_hung_uninterruptible_tasks ( unsigned long timeout )
2009-01-15 22:08:40 +03:00
{
int max_count = sysctl_hung_task_check_count ;
2019-01-04 02:26:31 +03:00
unsigned long last_break = jiffies ;
2009-01-15 22:08:40 +03:00
struct task_struct * g , * t ;
/*
* If the system crashed already then all bets are off ,
* do not report extra hung tasks :
*/
if ( test_taint ( TAINT_DIE ) | | did_panic )
return ;
2017-05-09 01:55:11 +03:00
hung_task_show_lock = false ;
2009-02-05 20:56:08 +03:00
rcu_read_lock ( ) ;
2015-04-16 02:16:47 +03:00
for_each_process_thread ( g , t ) {
2009-11-27 05:28:20 +03:00
if ( ! max_count - - )
2009-01-15 22:08:40 +03:00
goto unlock ;
2019-01-04 02:26:31 +03:00
if ( time_after ( jiffies , last_break + HUNG_TASK_LOCK_BREAK ) ) {
2012-03-06 02:59:14 +04:00
if ( ! rcu_lock_break ( g , t ) )
2009-02-05 07:35:48 +03:00
goto unlock ;
2019-01-04 02:26:31 +03:00
last_break = jiffies ;
2009-02-05 07:35:48 +03:00
}
2009-01-15 22:08:40 +03:00
/* use "==" to skip the TASK_KILLABLE tasks waiting on NFS */
2021-06-11 11:28:17 +03:00
if ( READ_ONCE ( t - > __state ) = = TASK_UNINTERRUPTIBLE )
2009-02-07 02:37:47 +03:00
check_hung_task ( t , timeout ) ;
2015-04-16 02:16:47 +03:00
}
2009-01-15 22:08:40 +03:00
unlock :
2009-02-05 20:56:08 +03:00
rcu_read_unlock ( ) ;
2022-06-23 17:51:57 +03:00
if ( hung_task_show_lock )
2017-05-09 01:55:11 +03:00
debug_show_all_locks ( ) ;
kernel/hung_task.c: introduce sysctl to print all traces when a hung task is detected
Commit 401c636a0eeb ("kernel/hung_task.c: show all hung tasks before
panic") introduced a change in that we started to show all CPUs
backtraces when a hung task is detected _and_ the sysctl/kernel
parameter "hung_task_panic" is set. The idea is good, because usually
when observing deadlocks (that may lead to hung tasks), the culprit is
another task holding a lock and not necessarily the task detected as
hung.
The problem with this approach is that dumping backtraces is a slightly
expensive task, specially printing that on console (and specially in
many CPU machines, as servers commonly found nowadays). So, users that
plan to collect a kdump to investigate the hung tasks and narrow down
the deadlock definitely don't need the CPUs backtrace on dmesg/console,
which will delay the panic and pollute the log (crash tool would easily
grab all CPUs traces with 'bt -a' command).
Also, there's the reciprocal scenario: some users may be interested in
seeing the CPUs backtraces but not have the system panic when a hung
task is detected. The current approach hence is almost as embedding a
policy in the kernel, by forcing the CPUs backtraces' dump (only) on
hung_task_panic.
This patch decouples the panic event on hung task from the CPUs
backtraces dump, by creating (and documenting) a new sysctl called
"hung_task_all_cpu_backtrace", analog to the approach taken on soft/hard
lockups, that have both a panic and an "all_cpu_backtrace" sysctl to
allow individual control. The new mechanism for dumping the CPUs
backtraces on hung task detection respects "hung_task_warnings" by not
dumping the traces in case there's no warnings left.
Signed-off-by: Guilherme G. Piccoli <gpiccoli@canonical.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Reviewed-by: Kees Cook <keescook@chromium.org>
Cc: Tetsuo Handa <penguin-kernel@I-love.SAKURA.ne.jp>
Link: http://lkml.kernel.org/r/20200327223646.20779-1-gpiccoli@canonical.com
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
2020-06-08 07:40:45 +03:00
if ( hung_task_show_all_bt ) {
hung_task_show_all_bt = false ;
kernel/hung_task.c: show all hung tasks before panic
When we get a hung task it can often be valuable to see _all_ the hung
tasks on the system before calling panic().
Quoting from https://syzkaller.appspot.com/text?tag=CrashReport&id=5316056503549952
----------------------------------------
INFO: task syz-executor0:6540 blocked for more than 120 seconds.
Not tainted 4.16.0+ #13
"echo 0 > /proc/sys/kernel/hung_task_timeout_secs" disables this message.
syz-executor0 D23560 6540 4521 0x80000004
Call Trace:
context_switch kernel/sched/core.c:2848 [inline]
__schedule+0x8fb/0x1ef0 kernel/sched/core.c:3490
schedule+0xf5/0x430 kernel/sched/core.c:3549
schedule_preempt_disabled+0x10/0x20 kernel/sched/core.c:3607
__mutex_lock_common kernel/locking/mutex.c:833 [inline]
__mutex_lock+0xb7f/0x1810 kernel/locking/mutex.c:893
mutex_lock_nested+0x16/0x20 kernel/locking/mutex.c:908
lo_ioctl+0x8b/0x1b70 drivers/block/loop.c:1355
__blkdev_driver_ioctl block/ioctl.c:303 [inline]
blkdev_ioctl+0x1759/0x1e00 block/ioctl.c:601
ioctl_by_bdev+0xa5/0x110 fs/block_dev.c:2060
isofs_get_last_session fs/isofs/inode.c:567 [inline]
isofs_fill_super+0x2ba9/0x3bc0 fs/isofs/inode.c:660
mount_bdev+0x2b7/0x370 fs/super.c:1119
isofs_mount+0x34/0x40 fs/isofs/inode.c:1560
mount_fs+0x66/0x2d0 fs/super.c:1222
vfs_kern_mount.part.26+0xc6/0x4a0 fs/namespace.c:1037
vfs_kern_mount fs/namespace.c:2514 [inline]
do_new_mount fs/namespace.c:2517 [inline]
do_mount+0xea4/0x2b90 fs/namespace.c:2847
ksys_mount+0xab/0x120 fs/namespace.c:3063
SYSC_mount fs/namespace.c:3077 [inline]
SyS_mount+0x39/0x50 fs/namespace.c:3074
do_syscall_64+0x281/0x940 arch/x86/entry/common.c:287
entry_SYSCALL_64_after_hwframe+0x42/0xb7
(...snipped...)
Showing all locks held in the system:
(...snipped...)
2 locks held by syz-executor0/6540:
#0: 00000000566d4c39 (&type->s_umount_key#49/1){+.+.}, at: alloc_super fs/super.c:211 [inline]
#0: 00000000566d4c39 (&type->s_umount_key#49/1){+.+.}, at: sget_userns+0x3b2/0xe60 fs/super.c:502 /* down_write_nested(&s->s_umount, SINGLE_DEPTH_NESTING); */
#1: 0000000043ca8836 (&lo->lo_ctl_mutex/1){+.+.}, at: lo_ioctl+0x8b/0x1b70 drivers/block/loop.c:1355 /* mutex_lock_nested(&lo->lo_ctl_mutex, 1); */
(...snipped...)
3 locks held by syz-executor7/6541:
#0: 0000000043ca8836 (&lo->lo_ctl_mutex/1){+.+.}, at: lo_ioctl+0x8b/0x1b70 drivers/block/loop.c:1355 /* mutex_lock_nested(&lo->lo_ctl_mutex, 1); */
#1: 000000007bf3d3f9 (&bdev->bd_mutex){+.+.}, at: blkdev_reread_part+0x1e/0x40 block/ioctl.c:192
#2: 00000000566d4c39 (&type->s_umount_key#50){.+.+}, at: __get_super.part.10+0x1d3/0x280 fs/super.c:663 /* down_read(&sb->s_umount); */
----------------------------------------
When reporting an AB-BA deadlock like shown above, it would be nice if
trace of PID=6541 is printed as well as trace of PID=6540 before calling
panic().
Showing hung tasks up to /proc/sys/kernel/hung_task_warnings could delay
calling panic() but normally there should not be so many hung tasks.
Link: http://lkml.kernel.org/r/201804050705.BHE57833.HVFOFtSOMQJFOL@I-love.SAKURA.ne.jp
Signed-off-by: Tetsuo Handa <penguin-kernel@I-love.SAKURA.ne.jp>
Acked-by: Paul E. McKenney <paulmck@linux.vnet.ibm.com>
Acked-by: Dmitry Vyukov <dvyukov@google.com>
Cc: Vegard Nossum <vegard.nossum@oracle.com>
Cc: Mandeep Singh Baines <msb@chromium.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: Ingo Molnar <mingo@kernel.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
2018-06-08 03:10:34 +03:00
trigger_all_cpu_backtrace ( ) ;
}
kernel/hung_task.c: introduce sysctl to print all traces when a hung task is detected
Commit 401c636a0eeb ("kernel/hung_task.c: show all hung tasks before
panic") introduced a change in that we started to show all CPUs
backtraces when a hung task is detected _and_ the sysctl/kernel
parameter "hung_task_panic" is set. The idea is good, because usually
when observing deadlocks (that may lead to hung tasks), the culprit is
another task holding a lock and not necessarily the task detected as
hung.
The problem with this approach is that dumping backtraces is a slightly
expensive task, specially printing that on console (and specially in
many CPU machines, as servers commonly found nowadays). So, users that
plan to collect a kdump to investigate the hung tasks and narrow down
the deadlock definitely don't need the CPUs backtrace on dmesg/console,
which will delay the panic and pollute the log (crash tool would easily
grab all CPUs traces with 'bt -a' command).
Also, there's the reciprocal scenario: some users may be interested in
seeing the CPUs backtraces but not have the system panic when a hung
task is detected. The current approach hence is almost as embedding a
policy in the kernel, by forcing the CPUs backtraces' dump (only) on
hung_task_panic.
This patch decouples the panic event on hung task from the CPUs
backtraces dump, by creating (and documenting) a new sysctl called
"hung_task_all_cpu_backtrace", analog to the approach taken on soft/hard
lockups, that have both a panic and an "all_cpu_backtrace" sysctl to
allow individual control. The new mechanism for dumping the CPUs
backtraces on hung task detection respects "hung_task_warnings" by not
dumping the traces in case there's no warnings left.
Signed-off-by: Guilherme G. Piccoli <gpiccoli@canonical.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Reviewed-by: Kees Cook <keescook@chromium.org>
Cc: Tetsuo Handa <penguin-kernel@I-love.SAKURA.ne.jp>
Link: http://lkml.kernel.org/r/20200327223646.20779-1-gpiccoli@canonical.com
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
2020-06-08 07:40:45 +03:00
if ( hung_task_call_panic )
panic ( " hung_task: blocked tasks " ) ;
2009-01-15 22:08:40 +03:00
}
2016-03-23 00:24:39 +03:00
static long hung_timeout_jiffies ( unsigned long last_checked ,
unsigned long timeout )
2009-01-15 22:08:40 +03:00
{
/* timeout of 0 will disable the watchdog */
2016-03-23 00:24:39 +03:00
return timeout ? last_checked - jiffies + timeout * HZ :
MAX_SCHEDULE_TIMEOUT ;
2009-01-15 22:08:40 +03:00
}
2022-01-22 09:11:00 +03:00
# ifdef CONFIG_SYSCTL
2009-01-15 22:08:40 +03:00
/*
* Process updating of timeout sysctl
*/
2022-01-22 09:11:00 +03:00
static int proc_dohung_task_timeout_secs ( struct ctl_table * table , int write ,
void __user * buffer ,
size_t * lenp , loff_t * ppos )
2009-01-15 22:08:40 +03:00
{
int ret ;
2009-09-24 02:57:19 +04:00
ret = proc_doulongvec_minmax ( table , write , buffer , lenp , ppos ) ;
2009-01-15 22:08:40 +03:00
if ( ret | | ! write )
goto out ;
wake_up_process ( watchdog_task ) ;
out :
return ret ;
}
2022-01-22 09:11:00 +03:00
/*
* This is needed for proc_doulongvec_minmax of sysctl_hung_task_timeout_secs
* and hung_task_check_interval_secs
*/
static const unsigned long hung_task_timeout_max = ( LONG_MAX / HZ ) ;
static struct ctl_table hung_task_sysctls [ ] = {
# ifdef CONFIG_SMP
{
. procname = " hung_task_all_cpu_backtrace " ,
. data = & sysctl_hung_task_all_cpu_backtrace ,
. maxlen = sizeof ( int ) ,
. mode = 0644 ,
. proc_handler = proc_dointvec_minmax ,
. extra1 = SYSCTL_ZERO ,
. extra2 = SYSCTL_ONE ,
} ,
# endif /* CONFIG_SMP */
{
. procname = " hung_task_panic " ,
. data = & sysctl_hung_task_panic ,
. maxlen = sizeof ( int ) ,
. mode = 0644 ,
. proc_handler = proc_dointvec_minmax ,
. extra1 = SYSCTL_ZERO ,
. extra2 = SYSCTL_ONE ,
} ,
{
. procname = " hung_task_check_count " ,
. data = & sysctl_hung_task_check_count ,
. maxlen = sizeof ( int ) ,
. mode = 0644 ,
. proc_handler = proc_dointvec_minmax ,
. extra1 = SYSCTL_ZERO ,
} ,
{
. procname = " hung_task_timeout_secs " ,
. data = & sysctl_hung_task_timeout_secs ,
. maxlen = sizeof ( unsigned long ) ,
. mode = 0644 ,
. proc_handler = proc_dohung_task_timeout_secs ,
. extra2 = ( void * ) & hung_task_timeout_max ,
} ,
{
. procname = " hung_task_check_interval_secs " ,
. data = & sysctl_hung_task_check_interval_secs ,
. maxlen = sizeof ( unsigned long ) ,
. mode = 0644 ,
. proc_handler = proc_dohung_task_timeout_secs ,
. extra2 = ( void * ) & hung_task_timeout_max ,
} ,
{
. procname = " hung_task_warnings " ,
. data = & sysctl_hung_task_warnings ,
. maxlen = sizeof ( int ) ,
. mode = 0644 ,
. proc_handler = proc_dointvec_minmax ,
. extra1 = SYSCTL_NEG_ONE ,
} ,
{ }
} ;
static void __init hung_task_sysctl_init ( void )
{
register_sysctl_init ( " kernel " , hung_task_sysctls ) ;
}
# else
# define hung_task_sysctl_init() do { } while (0)
# endif /* CONFIG_SYSCTL */
2013-10-12 04:39:26 +04:00
static atomic_t reset_hung_task = ATOMIC_INIT ( 0 ) ;
void reset_hung_task_detector ( void )
{
atomic_set ( & reset_hung_task , 1 ) ;
}
EXPORT_SYMBOL_GPL ( reset_hung_task_detector ) ;
2018-10-17 14:23:55 +03:00
static bool hung_detector_suspended ;
static int hungtask_pm_notify ( struct notifier_block * self ,
unsigned long action , void * hcpu )
{
switch ( action ) {
case PM_SUSPEND_PREPARE :
case PM_HIBERNATION_PREPARE :
case PM_RESTORE_PREPARE :
hung_detector_suspended = true ;
break ;
case PM_POST_SUSPEND :
case PM_POST_HIBERNATION :
case PM_POST_RESTORE :
hung_detector_suspended = false ;
break ;
default :
break ;
}
return NOTIFY_OK ;
}
2009-01-15 22:08:40 +03:00
/*
* kthread which checks for tasks stuck in D state
*/
static int watchdog ( void * dummy )
{
2016-03-23 00:24:39 +03:00
unsigned long hung_last_checked = jiffies ;
2009-01-15 22:08:40 +03:00
set_user_nice ( current , 0 ) ;
for ( ; ; ) {
2009-02-07 02:37:47 +03:00
unsigned long timeout = sysctl_hung_task_timeout_secs ;
2018-08-22 07:55:52 +03:00
unsigned long interval = sysctl_hung_task_check_interval_secs ;
long t ;
2009-01-17 21:31:48 +03:00
2018-08-22 07:55:52 +03:00
if ( interval = = 0 )
interval = timeout ;
interval = min_t ( unsigned long , interval , timeout ) ;
t = hung_timeout_jiffies ( hung_last_checked , interval ) ;
2016-03-23 00:24:39 +03:00
if ( t < = 0 ) {
2018-10-17 14:23:55 +03:00
if ( ! atomic_xchg ( & reset_hung_task , 0 ) & &
! hung_detector_suspended )
2016-03-23 00:24:39 +03:00
check_hung_uninterruptible_tasks ( timeout ) ;
hung_last_checked = jiffies ;
2013-10-12 04:39:26 +04:00
continue ;
2016-03-23 00:24:39 +03:00
}
schedule_timeout_interruptible ( t ) ;
2009-01-15 22:08:40 +03:00
}
return 0 ;
}
static int __init hung_task_init ( void )
{
atomic_notifier_chain_register ( & panic_notifier_list , & panic_block ) ;
2018-10-17 14:23:55 +03:00
/* Disable hung task detector on suspend */
pm_notifier ( hungtask_pm_notify , 0 ) ;
2009-01-15 22:08:40 +03:00
watchdog_task = kthread_run ( watchdog , NULL , " khungtaskd " ) ;
2022-01-22 09:11:00 +03:00
hung_task_sysctl_init ( ) ;
2009-01-15 22:08:40 +03:00
return 0 ;
}
2014-04-04 01:48:35 +04:00
subsys_initcall ( hung_task_init ) ;