2010-12-28 14:25:21 -08:00
/*
* Persistent Storage - platform driver interface parts .
*
2012-05-26 06:20:19 -07:00
* Copyright ( C ) 2007 - 2008 Google , Inc .
2010-12-28 14:25:21 -08:00
* Copyright ( C ) 2010 Intel Corporation < tony . luck @ intel . com >
*
* This program is free software ; you can redistribute it and / or modify
* it under the terms of the GNU General Public License version 2 as
* published by the Free Software Foundation .
*
* This program is distributed in the hope that it will be useful ,
* but WITHOUT ANY WARRANTY ; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE . See the
* GNU General Public License for more details .
*
* You should have received a copy of the GNU General Public License
* along with this program ; if not , write to the Free Software
* Foundation , Inc . , 59 Temple Place , Suite 330 , Boston , MA 02111 - 1307 USA
*/
2014-06-06 14:37:31 -07:00
# define pr_fmt(fmt) "pstore: " fmt
2010-12-28 14:25:21 -08:00
# include <linux/atomic.h>
# include <linux/types.h>
# include <linux/errno.h>
# include <linux/init.h>
# include <linux/kmsg_dump.h>
2012-05-26 06:20:19 -07:00
# include <linux/console.h>
2010-12-28 14:25:21 -08:00
# include <linux/module.h>
# include <linux/pstore.h>
2013-08-16 13:53:10 -07:00
# include <linux/zlib.h>
2010-12-28 14:25:21 -08:00
# include <linux/string.h>
2011-08-11 15:14:39 -07:00
# include <linux/timer.h>
2010-12-28 14:25:21 -08:00
# include <linux/slab.h>
# include <linux/uaccess.h>
2011-08-12 10:54:51 -07:00
# include <linux/hardirq.h>
2012-05-26 06:20:28 -07:00
# include <linux/jiffies.h>
2011-08-11 15:14:39 -07:00
# include <linux/workqueue.h>
2010-12-28 14:25:21 -08:00
# include "internal.h"
2011-08-11 15:14:39 -07:00
/*
* We defer making " oops " entries appear in pstore - see
* whether the system is actually still running well enough
* to let someone see the entry
*/
pstore/platform: Disable automatic updates by default
Having automatic updates seems pointless for production system, and
even dangerous and thus counter-productive:
1. If we can mount pstore, or read files, we can as well read
/proc/kmsg. So, there's little point in duplicating the
functionality and present the same information but via another
userland ABI;
2. Expecting the kernel to behave sanely after oops/panic is naive.
It might work, but you'd rather not try it. Screwed up kernel
can do rather bad things, like recursive faults[1]; and pstore
rather provoking bad things to happen. It uses:
1. Timers (assumes sane interrupts state);
2. Workqueues and mutexes (assumes scheduler in a sane state);
3. kzalloc (a working slab allocator);
That's too much for a dead kernel, so the debugging facility
itself might just make debugging harder, which is not what
we want.
Maybe for non-oops message types it would make sense to re-enable
automatic updates, but so far I don't see any use case for this.
Even for tracing, it has its own run-time/normal ABI, so we're
only interested in pstore upon next boot, to retrieve what has
gone wrong with HW or SW.
So, let's disable the updates by default.
[1]
BUG: unable to handle kernel paging request at fffffffffffffff8
IP: [<ffffffff8104801b>] kthread_data+0xb/0x20
[...]
Process kworker/0:1 (pid: 14, threadinfo ffff8800072c0000, task ffff88000725b100)
[...
Call Trace:
[<ffffffff81043710>] wq_worker_sleeping+0x10/0xa0
[<ffffffff813687a8>] __schedule+0x568/0x7d0
[<ffffffff8106c24d>] ? trace_hardirqs_on+0xd/0x10
[<ffffffff81087e22>] ? call_rcu_sched+0x12/0x20
[<ffffffff8102b596>] ? release_task+0x156/0x2d0
[<ffffffff8102b45e>] ? release_task+0x1e/0x2d0
[<ffffffff8106c24d>] ? trace_hardirqs_on+0xd/0x10
[<ffffffff81368ac4>] schedule+0x24/0x70
[<ffffffff8102cba8>] do_exit+0x1f8/0x370
[<ffffffff810051e7>] oops_end+0x77/0xb0
[<ffffffff8135c301>] no_context+0x1a6/0x1b5
[<ffffffff8135c4de>] __bad_area_nosemaphore+0x1ce/0x1ed
[<ffffffff81053156>] ? ttwu_queue+0xc6/0xe0
[<ffffffff8135c50b>] bad_area_nosemaphore+0xe/0x10
[<ffffffff8101fa47>] do_page_fault+0x2c7/0x450
[<ffffffff8106e34b>] ? __lock_release+0x6b/0xe0
[<ffffffff8106bf21>] ? mark_held_locks+0x61/0x140
[<ffffffff810502fe>] ? __wake_up+0x4e/0x70
[<ffffffff81185f7d>] ? trace_hardirqs_off_thunk+0x3a/0x3c
[<ffffffff81158970>] ? pstore_register+0x120/0x120
[<ffffffff8136a37f>] page_fault+0x1f/0x30
[<ffffffff81158970>] ? pstore_register+0x120/0x120
[<ffffffff81185ab8>] ? memcpy+0x68/0x110
[<ffffffff8115875a>] ? pstore_get_records+0x3a/0x130
[<ffffffff811590f4>] ? persistent_ram_copy_old+0x64/0x90
[<ffffffff81158bf4>] ramoops_pstore_read+0x84/0x130
[<ffffffff81158799>] pstore_get_records+0x79/0x130
[<ffffffff81042536>] ? process_one_work+0x116/0x450
[<ffffffff81158970>] ? pstore_register+0x120/0x120
[<ffffffff8115897e>] pstore_dowork+0xe/0x10
[<ffffffff81042594>] process_one_work+0x174/0x450
[<ffffffff81042536>] ? process_one_work+0x116/0x450
[<ffffffff81042e13>] worker_thread+0x123/0x2d0
[<ffffffff81042cf0>] ? manage_workers.isra.28+0x120/0x120
[<ffffffff81047d8e>] kthread+0x8e/0xa0
[<ffffffff8136ba74>] kernel_thread_helper+0x4/0x10
[<ffffffff8136a199>] ? retint_restore_args+0xe/0xe
[<ffffffff81047d00>] ? __init_kthread_worker+0x70/0x70
[<ffffffff8136ba70>] ? gs_change+0xb/0xb
Code: be e2 00 00 00 48 c7 c7 d1 2a 4e 81 e8 bf fb fd ff 48 8b 5d f0 4c 8b 65 f8 c9 c3 0f 1f 44 00 00 48 8b 87 08 02 00 00 55 48 89 e5 <48> 8b 40 f8 5d c3 66 66 66 66 66 66 2e 0f 1f 84 00 00 00 00 00
RIP [<ffffffff8104801b>] kthread_data+0xb/0x20
RSP <ffff8800072c1888>
CR2: fffffffffffffff8
---[ end trace 996a332dc399111d ]---
Fixing recursive fault but reboot is needed!
Signed-off-by: Anton Vorontsov <anton.vorontsov@linaro.org>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
2012-05-26 06:20:29 -07:00
static int pstore_update_ms = - 1 ;
2012-05-26 06:20:28 -07:00
module_param_named ( update_ms , pstore_update_ms , int , 0600 ) ;
MODULE_PARM_DESC ( update_ms , " milliseconds before pstore updates its content "
pstore/platform: Disable automatic updates by default
Having automatic updates seems pointless for production system, and
even dangerous and thus counter-productive:
1. If we can mount pstore, or read files, we can as well read
/proc/kmsg. So, there's little point in duplicating the
functionality and present the same information but via another
userland ABI;
2. Expecting the kernel to behave sanely after oops/panic is naive.
It might work, but you'd rather not try it. Screwed up kernel
can do rather bad things, like recursive faults[1]; and pstore
rather provoking bad things to happen. It uses:
1. Timers (assumes sane interrupts state);
2. Workqueues and mutexes (assumes scheduler in a sane state);
3. kzalloc (a working slab allocator);
That's too much for a dead kernel, so the debugging facility
itself might just make debugging harder, which is not what
we want.
Maybe for non-oops message types it would make sense to re-enable
automatic updates, but so far I don't see any use case for this.
Even for tracing, it has its own run-time/normal ABI, so we're
only interested in pstore upon next boot, to retrieve what has
gone wrong with HW or SW.
So, let's disable the updates by default.
[1]
BUG: unable to handle kernel paging request at fffffffffffffff8
IP: [<ffffffff8104801b>] kthread_data+0xb/0x20
[...]
Process kworker/0:1 (pid: 14, threadinfo ffff8800072c0000, task ffff88000725b100)
[...
Call Trace:
[<ffffffff81043710>] wq_worker_sleeping+0x10/0xa0
[<ffffffff813687a8>] __schedule+0x568/0x7d0
[<ffffffff8106c24d>] ? trace_hardirqs_on+0xd/0x10
[<ffffffff81087e22>] ? call_rcu_sched+0x12/0x20
[<ffffffff8102b596>] ? release_task+0x156/0x2d0
[<ffffffff8102b45e>] ? release_task+0x1e/0x2d0
[<ffffffff8106c24d>] ? trace_hardirqs_on+0xd/0x10
[<ffffffff81368ac4>] schedule+0x24/0x70
[<ffffffff8102cba8>] do_exit+0x1f8/0x370
[<ffffffff810051e7>] oops_end+0x77/0xb0
[<ffffffff8135c301>] no_context+0x1a6/0x1b5
[<ffffffff8135c4de>] __bad_area_nosemaphore+0x1ce/0x1ed
[<ffffffff81053156>] ? ttwu_queue+0xc6/0xe0
[<ffffffff8135c50b>] bad_area_nosemaphore+0xe/0x10
[<ffffffff8101fa47>] do_page_fault+0x2c7/0x450
[<ffffffff8106e34b>] ? __lock_release+0x6b/0xe0
[<ffffffff8106bf21>] ? mark_held_locks+0x61/0x140
[<ffffffff810502fe>] ? __wake_up+0x4e/0x70
[<ffffffff81185f7d>] ? trace_hardirqs_off_thunk+0x3a/0x3c
[<ffffffff81158970>] ? pstore_register+0x120/0x120
[<ffffffff8136a37f>] page_fault+0x1f/0x30
[<ffffffff81158970>] ? pstore_register+0x120/0x120
[<ffffffff81185ab8>] ? memcpy+0x68/0x110
[<ffffffff8115875a>] ? pstore_get_records+0x3a/0x130
[<ffffffff811590f4>] ? persistent_ram_copy_old+0x64/0x90
[<ffffffff81158bf4>] ramoops_pstore_read+0x84/0x130
[<ffffffff81158799>] pstore_get_records+0x79/0x130
[<ffffffff81042536>] ? process_one_work+0x116/0x450
[<ffffffff81158970>] ? pstore_register+0x120/0x120
[<ffffffff8115897e>] pstore_dowork+0xe/0x10
[<ffffffff81042594>] process_one_work+0x174/0x450
[<ffffffff81042536>] ? process_one_work+0x116/0x450
[<ffffffff81042e13>] worker_thread+0x123/0x2d0
[<ffffffff81042cf0>] ? manage_workers.isra.28+0x120/0x120
[<ffffffff81047d8e>] kthread+0x8e/0xa0
[<ffffffff8136ba74>] kernel_thread_helper+0x4/0x10
[<ffffffff8136a199>] ? retint_restore_args+0xe/0xe
[<ffffffff81047d00>] ? __init_kthread_worker+0x70/0x70
[<ffffffff8136ba70>] ? gs_change+0xb/0xb
Code: be e2 00 00 00 48 c7 c7 d1 2a 4e 81 e8 bf fb fd ff 48 8b 5d f0 4c 8b 65 f8 c9 c3 0f 1f 44 00 00 48 8b 87 08 02 00 00 55 48 89 e5 <48> 8b 40 f8 5d c3 66 66 66 66 66 66 2e 0f 1f 84 00 00 00 00 00
RIP [<ffffffff8104801b>] kthread_data+0xb/0x20
RSP <ffff8800072c1888>
CR2: fffffffffffffff8
---[ end trace 996a332dc399111d ]---
Fixing recursive fault but reboot is needed!
Signed-off-by: Anton Vorontsov <anton.vorontsov@linaro.org>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
2012-05-26 06:20:29 -07:00
" (default is -1, which means runtime updates are disabled; "
" enabling this option is not safe, it may lead to further "
" corruption on Oopses) " ) ;
2011-08-11 15:14:39 -07:00
static int pstore_new_entry ;
static void pstore_timefunc ( unsigned long ) ;
static DEFINE_TIMER ( pstore_timer , pstore_timefunc , 0 , 0 ) ;
static void pstore_dowork ( struct work_struct * ) ;
static DECLARE_WORK ( pstore_work , pstore_dowork ) ;
2010-12-28 14:25:21 -08:00
/*
* pstore_lock just protects " psinfo " during
* calls to pstore_register ( )
*/
static DEFINE_SPINLOCK ( pstore_lock ) ;
2012-07-09 17:10:41 -07:00
struct pstore_info * psinfo ;
2010-12-28 14:25:21 -08:00
2011-07-21 16:57:55 -04:00
static char * backend ;
2013-08-16 13:53:10 -07:00
/* Compression parameters */
# define COMPR_LEVEL 6
# define WINDOW_BITS 12
# define MEM_LEVEL 4
static struct z_stream_s stream ;
static char * big_oops_buf ;
static size_t big_oops_buf_sz ;
2011-03-18 15:33:43 -07:00
/* How much of the console log to snapshot */
2010-12-28 14:25:21 -08:00
static unsigned long kmsg_bytes = 10240 ;
2011-03-18 15:33:43 -07:00
void pstore_set_kmsg_bytes ( int bytes )
2010-12-28 14:25:21 -08:00
{
2011-03-18 15:33:43 -07:00
kmsg_bytes = bytes ;
2010-12-28 14:25:21 -08:00
}
/* Tag each group of saved records with a sequence number */
static int oopscount ;
2012-03-16 15:36:59 -07:00
static const char * get_reason_str ( enum kmsg_dump_reason reason )
{
switch ( reason ) {
case KMSG_DUMP_PANIC :
return " Panic " ;
case KMSG_DUMP_OOPS :
return " Oops " ;
case KMSG_DUMP_EMERG :
return " Emergency " ;
case KMSG_DUMP_RESTART :
return " Restart " ;
case KMSG_DUMP_HALT :
return " Halt " ;
case KMSG_DUMP_POWEROFF :
return " Poweroff " ;
default :
return " Unknown " ;
}
}
2011-03-22 16:01:49 -07:00
pstore: Avoid deadlock in panic and emergency-restart path
[Issue]
When pstore is in panic and emergency-restart paths, it may be blocked
in those paths because it simply takes spin_lock.
This is an example scenario which pstore may hang up in a panic path:
- cpuA grabs psinfo->buf_lock
- cpuB panics and calls smp_send_stop
- smp_send_stop sends IRQ to cpuA
- after 1 second, cpuB gives up on cpuA and sends an NMI instead
- cpuA is now in an NMI handler while still holding buf_lock
- cpuB is deadlocked
This case may happen if a firmware has a bug and
cpuA is stuck talking with it more than one second.
Also, this is a similar scenario in an emergency-restart path:
- cpuA grabs psinfo->buf_lock and stucks in a firmware
- cpuB kicks emergency-restart via either sysrq-b or hangcheck timer.
And then, cpuB is deadlocked by taking psinfo->buf_lock again.
[Solution]
This patch avoids the deadlocking issues in both panic and emergency_restart
paths by introducing a function, is_non_blocking_path(), to check if a cpu
can be blocked in current path.
With this patch, pstore is not blocked even if another cpu has
taken a spin_lock, in those paths by changing from spin_lock_irqsave
to spin_trylock_irqsave.
In addition, according to a comment of emergency_restart() in kernel/sys.c,
spin_lock shouldn't be taken in an emergency_restart path to avoid
deadlock. This patch fits the comment below.
<snip>
/**
* emergency_restart - reboot the system
*
* Without shutting down any hardware or taking any locks
* reboot the system. This is called when we know we are in
* trouble so this is our best effort to reboot. This is
* safe to call in interrupt context.
*/
void emergency_restart(void)
<snip>
Signed-off-by: Seiji Aguchi <seiji.aguchi@hds.com>
Acked-by: Don Zickus <dzickus@redhat.com>
Signed-off-by: Tony Luck <tony.luck@intel.com>
2013-01-11 18:09:41 +00:00
bool pstore_cannot_block_path ( enum kmsg_dump_reason reason )
{
/*
* In case of NMI path , pstore shouldn ' t be blocked
* regardless of reason .
*/
if ( in_nmi ( ) )
return true ;
switch ( reason ) {
/* In panic case, other cpus are stopped by smp_send_stop(). */
case KMSG_DUMP_PANIC :
/* Emergency restart shouldn't be blocked by spin lock. */
case KMSG_DUMP_EMERG :
return true ;
default :
return false ;
}
}
EXPORT_SYMBOL_GPL ( pstore_cannot_block_path ) ;
2013-08-16 13:53:10 -07:00
/* Derived from logfs_compress() */
static int pstore_compress ( const void * in , void * out , size_t inlen ,
size_t outlen )
{
int err , ret ;
ret = - EIO ;
err = zlib_deflateInit2 ( & stream , COMPR_LEVEL , Z_DEFLATED , WINDOW_BITS ,
MEM_LEVEL , Z_DEFAULT_STRATEGY ) ;
if ( err ! = Z_OK )
goto error ;
stream . next_in = in ;
stream . avail_in = inlen ;
stream . total_in = 0 ;
stream . next_out = out ;
stream . avail_out = outlen ;
stream . total_out = 0 ;
err = zlib_deflate ( & stream , Z_FINISH ) ;
if ( err ! = Z_STREAM_END )
goto error ;
err = zlib_deflateEnd ( & stream ) ;
if ( err ! = Z_OK )
goto error ;
if ( stream . total_out > = stream . total_in )
goto error ;
ret = stream . total_out ;
error :
return ret ;
}
2013-08-16 13:53:28 -07:00
/* Derived from logfs_uncompress */
static int pstore_decompress ( void * in , void * out , size_t inlen , size_t outlen )
{
int err , ret ;
ret = - EIO ;
2013-09-11 10:58:03 -07:00
err = zlib_inflateInit2 ( & stream , WINDOW_BITS ) ;
2013-08-16 13:53:28 -07:00
if ( err ! = Z_OK )
goto error ;
stream . next_in = in ;
stream . avail_in = inlen ;
stream . total_in = 0 ;
stream . next_out = out ;
stream . avail_out = outlen ;
stream . total_out = 0 ;
err = zlib_inflate ( & stream , Z_FINISH ) ;
if ( err ! = Z_STREAM_END )
goto error ;
err = zlib_inflateEnd ( & stream ) ;
if ( err ! = Z_OK )
goto error ;
ret = stream . total_out ;
error :
return ret ;
}
2013-08-16 13:53:10 -07:00
static void allocate_buf_for_compression ( void )
{
size_t size ;
2013-09-11 10:57:41 -07:00
size_t cmpr ;
switch ( psinfo - > bufsize ) {
/* buffer range for efivars */
case 1000 . . . 2000 :
cmpr = 56 ;
break ;
case 2001 . . . 3000 :
cmpr = 54 ;
break ;
case 3001 . . . 3999 :
cmpr = 52 ;
break ;
/* buffer range for nvram, erst */
case 4000 . . . 10000 :
cmpr = 45 ;
break ;
default :
cmpr = 60 ;
break ;
}
2013-08-16 13:53:10 -07:00
2013-09-11 10:57:41 -07:00
big_oops_buf_sz = ( psinfo - > bufsize * 100 ) / cmpr ;
2013-08-16 13:53:10 -07:00
big_oops_buf = kmalloc ( big_oops_buf_sz , GFP_KERNEL ) ;
if ( big_oops_buf ) {
size = max ( zlib_deflate_workspacesize ( WINDOW_BITS , MEM_LEVEL ) ,
zlib_inflate_workspacesize ( ) ) ;
stream . workspace = kmalloc ( size , GFP_KERNEL ) ;
if ( ! stream . workspace ) {
2014-06-06 14:37:31 -07:00
pr_err ( " No memory for compression workspace; skipping compression \n " ) ;
2013-08-16 13:53:10 -07:00
kfree ( big_oops_buf ) ;
big_oops_buf = NULL ;
}
} else {
2014-06-06 14:37:31 -07:00
pr_err ( " No memory for uncompressed data; skipping compression \n " ) ;
2013-08-16 13:53:10 -07:00
stream . workspace = NULL ;
}
}
/*
* Called when compression fails , since the printk buffer
* would be fetched for compression calling it again when
* compression fails would have moved the iterator of
* printk buffer which results in fetching old contents .
* Copy the recent messages from big_oops_buf to psinfo - > buf
*/
static size_t copy_kmsg_to_buffer ( int hsize , size_t len )
{
size_t total_len ;
size_t diff ;
total_len = hsize + len ;
if ( total_len > psinfo - > bufsize ) {
diff = total_len - psinfo - > bufsize + hsize ;
memcpy ( psinfo - > buf , big_oops_buf , hsize ) ;
memcpy ( psinfo - > buf + hsize , big_oops_buf + diff ,
psinfo - > bufsize - hsize ) ;
total_len = psinfo - > bufsize ;
} else
memcpy ( psinfo - > buf , big_oops_buf , total_len ) ;
return total_len ;
}
2010-12-28 14:25:21 -08:00
/*
* callback from kmsg_dump . ( s2 , l2 ) has the most recently
* written bytes , older bytes are in ( s1 , l1 ) . Save as much
* as we can from the end of the buffer .
*/
static void pstore_dump ( struct kmsg_dumper * dumper ,
2012-06-15 14:07:51 +02:00
enum kmsg_dump_reason reason )
2010-12-28 14:25:21 -08:00
{
2012-06-15 14:07:51 +02:00
unsigned long total = 0 ;
2012-03-16 15:36:59 -07:00
const char * why ;
2010-12-28 14:25:21 -08:00
u64 id ;
2011-07-21 16:57:54 -04:00
unsigned int part = 1 ;
2011-08-12 10:54:51 -07:00
unsigned long flags = 0 ;
int is_locked = 0 ;
2012-06-15 14:07:51 +02:00
int ret ;
2010-12-28 14:25:21 -08:00
2012-03-16 15:36:59 -07:00
why = get_reason_str ( reason ) ;
2011-03-22 16:01:49 -07:00
pstore: Avoid deadlock in panic and emergency-restart path
[Issue]
When pstore is in panic and emergency-restart paths, it may be blocked
in those paths because it simply takes spin_lock.
This is an example scenario which pstore may hang up in a panic path:
- cpuA grabs psinfo->buf_lock
- cpuB panics and calls smp_send_stop
- smp_send_stop sends IRQ to cpuA
- after 1 second, cpuB gives up on cpuA and sends an NMI instead
- cpuA is now in an NMI handler while still holding buf_lock
- cpuB is deadlocked
This case may happen if a firmware has a bug and
cpuA is stuck talking with it more than one second.
Also, this is a similar scenario in an emergency-restart path:
- cpuA grabs psinfo->buf_lock and stucks in a firmware
- cpuB kicks emergency-restart via either sysrq-b or hangcheck timer.
And then, cpuB is deadlocked by taking psinfo->buf_lock again.
[Solution]
This patch avoids the deadlocking issues in both panic and emergency_restart
paths by introducing a function, is_non_blocking_path(), to check if a cpu
can be blocked in current path.
With this patch, pstore is not blocked even if another cpu has
taken a spin_lock, in those paths by changing from spin_lock_irqsave
to spin_trylock_irqsave.
In addition, according to a comment of emergency_restart() in kernel/sys.c,
spin_lock shouldn't be taken in an emergency_restart path to avoid
deadlock. This patch fits the comment below.
<snip>
/**
* emergency_restart - reboot the system
*
* Without shutting down any hardware or taking any locks
* reboot the system. This is called when we know we are in
* trouble so this is our best effort to reboot. This is
* safe to call in interrupt context.
*/
void emergency_restart(void)
<snip>
Signed-off-by: Seiji Aguchi <seiji.aguchi@hds.com>
Acked-by: Don Zickus <dzickus@redhat.com>
Signed-off-by: Tony Luck <tony.luck@intel.com>
2013-01-11 18:09:41 +00:00
if ( pstore_cannot_block_path ( reason ) ) {
is_locked = spin_trylock_irqsave ( & psinfo - > buf_lock , flags ) ;
if ( ! is_locked ) {
pr_err ( " pstore dump routine blocked in %s path, may corrupt error record \n "
, in_nmi ( ) ? " NMI " : why ) ;
}
2011-08-12 10:54:51 -07:00
} else
spin_lock_irqsave ( & psinfo - > buf_lock , flags ) ;
2010-12-28 14:25:21 -08:00
oopscount + + ;
while ( total < kmsg_bytes ) {
2012-06-15 14:07:51 +02:00
char * dst ;
unsigned long size ;
int hsize ;
2013-08-16 13:53:10 -07:00
int zipped_len = - 1 ;
2012-06-15 14:07:51 +02:00
size_t len ;
2013-08-16 13:53:10 -07:00
bool compressed ;
size_t total_len ;
2012-06-15 14:07:51 +02:00
2013-08-16 13:53:10 -07:00
if ( big_oops_buf ) {
dst = big_oops_buf ;
hsize = sprintf ( dst , " %s#%d Part%d \n " , why ,
oopscount , part ) ;
size = big_oops_buf_sz - hsize ;
2010-12-28 14:25:21 -08:00
2013-08-16 13:53:10 -07:00
if ( ! kmsg_dump_get_buffer ( dumper , true , dst + hsize ,
size , & len ) )
break ;
zipped_len = pstore_compress ( dst , psinfo - > buf ,
hsize + len , psinfo - > bufsize ) ;
if ( zipped_len > 0 ) {
compressed = true ;
total_len = zipped_len ;
} else {
compressed = false ;
total_len = copy_kmsg_to_buffer ( hsize , len ) ;
}
} else {
dst = psinfo - > buf ;
hsize = sprintf ( dst , " %s#%d Part%d \n " , why , oopscount ,
part ) ;
size = psinfo - > bufsize - hsize ;
dst + = hsize ;
if ( ! kmsg_dump_get_buffer ( dumper , true , dst ,
size , & len ) )
break ;
compressed = false ;
total_len = hsize + len ;
}
2010-12-28 14:25:21 -08:00
2011-11-17 13:13:29 -08:00
ret = psinfo - > write ( PSTORE_TYPE_DMESG , reason , & id , part ,
2013-08-16 13:53:10 -07:00
oopscount , compressed , total_len , psinfo ) ;
2011-10-12 09:17:24 -07:00
if ( ret = = 0 & & reason = = KMSG_DUMP_OOPS & & pstore_is_mounted ( ) )
2011-08-11 15:14:39 -07:00
pstore_new_entry = 1 ;
2012-06-15 14:07:51 +02:00
2013-08-16 13:53:10 -07:00
total + = total_len ;
2011-07-21 16:57:53 -04:00
part + + ;
2010-12-28 14:25:21 -08:00
}
pstore: Avoid deadlock in panic and emergency-restart path
[Issue]
When pstore is in panic and emergency-restart paths, it may be blocked
in those paths because it simply takes spin_lock.
This is an example scenario which pstore may hang up in a panic path:
- cpuA grabs psinfo->buf_lock
- cpuB panics and calls smp_send_stop
- smp_send_stop sends IRQ to cpuA
- after 1 second, cpuB gives up on cpuA and sends an NMI instead
- cpuA is now in an NMI handler while still holding buf_lock
- cpuB is deadlocked
This case may happen if a firmware has a bug and
cpuA is stuck talking with it more than one second.
Also, this is a similar scenario in an emergency-restart path:
- cpuA grabs psinfo->buf_lock and stucks in a firmware
- cpuB kicks emergency-restart via either sysrq-b or hangcheck timer.
And then, cpuB is deadlocked by taking psinfo->buf_lock again.
[Solution]
This patch avoids the deadlocking issues in both panic and emergency_restart
paths by introducing a function, is_non_blocking_path(), to check if a cpu
can be blocked in current path.
With this patch, pstore is not blocked even if another cpu has
taken a spin_lock, in those paths by changing from spin_lock_irqsave
to spin_trylock_irqsave.
In addition, according to a comment of emergency_restart() in kernel/sys.c,
spin_lock shouldn't be taken in an emergency_restart path to avoid
deadlock. This patch fits the comment below.
<snip>
/**
* emergency_restart - reboot the system
*
* Without shutting down any hardware or taking any locks
* reboot the system. This is called when we know we are in
* trouble so this is our best effort to reboot. This is
* safe to call in interrupt context.
*/
void emergency_restart(void)
<snip>
Signed-off-by: Seiji Aguchi <seiji.aguchi@hds.com>
Acked-by: Don Zickus <dzickus@redhat.com>
Signed-off-by: Tony Luck <tony.luck@intel.com>
2013-01-11 18:09:41 +00:00
if ( pstore_cannot_block_path ( reason ) ) {
2011-08-12 10:54:51 -07:00
if ( is_locked )
pstore: Avoid deadlock in panic and emergency-restart path
[Issue]
When pstore is in panic and emergency-restart paths, it may be blocked
in those paths because it simply takes spin_lock.
This is an example scenario which pstore may hang up in a panic path:
- cpuA grabs psinfo->buf_lock
- cpuB panics and calls smp_send_stop
- smp_send_stop sends IRQ to cpuA
- after 1 second, cpuB gives up on cpuA and sends an NMI instead
- cpuA is now in an NMI handler while still holding buf_lock
- cpuB is deadlocked
This case may happen if a firmware has a bug and
cpuA is stuck talking with it more than one second.
Also, this is a similar scenario in an emergency-restart path:
- cpuA grabs psinfo->buf_lock and stucks in a firmware
- cpuB kicks emergency-restart via either sysrq-b or hangcheck timer.
And then, cpuB is deadlocked by taking psinfo->buf_lock again.
[Solution]
This patch avoids the deadlocking issues in both panic and emergency_restart
paths by introducing a function, is_non_blocking_path(), to check if a cpu
can be blocked in current path.
With this patch, pstore is not blocked even if another cpu has
taken a spin_lock, in those paths by changing from spin_lock_irqsave
to spin_trylock_irqsave.
In addition, according to a comment of emergency_restart() in kernel/sys.c,
spin_lock shouldn't be taken in an emergency_restart path to avoid
deadlock. This patch fits the comment below.
<snip>
/**
* emergency_restart - reboot the system
*
* Without shutting down any hardware or taking any locks
* reboot the system. This is called when we know we are in
* trouble so this is our best effort to reboot. This is
* safe to call in interrupt context.
*/
void emergency_restart(void)
<snip>
Signed-off-by: Seiji Aguchi <seiji.aguchi@hds.com>
Acked-by: Don Zickus <dzickus@redhat.com>
Signed-off-by: Tony Luck <tony.luck@intel.com>
2013-01-11 18:09:41 +00:00
spin_unlock_irqrestore ( & psinfo - > buf_lock , flags ) ;
2011-08-12 10:54:51 -07:00
} else
spin_unlock_irqrestore ( & psinfo - > buf_lock , flags ) ;
2010-12-28 14:25:21 -08:00
}
static struct kmsg_dumper pstore_dumper = {
. dump = pstore_dump ,
} ;
2012-05-26 06:20:19 -07:00
# ifdef CONFIG_PSTORE_CONSOLE
static void pstore_console_write ( struct console * con , const char * s , unsigned c )
{
const char * e = s + c ;
while ( s < e ) {
unsigned long flags ;
2012-11-14 11:49:53 +00:00
u64 id ;
2012-05-26 06:20:19 -07:00
if ( c > psinfo - > bufsize )
c = psinfo - > bufsize ;
2012-09-18 01:43:44 +08:00
if ( oops_in_progress ) {
if ( ! spin_trylock_irqsave ( & psinfo - > buf_lock , flags ) )
break ;
} else {
spin_lock_irqsave ( & psinfo - > buf_lock , flags ) ;
}
2012-05-26 06:20:19 -07:00
memcpy ( psinfo - > buf , s , c ) ;
2013-06-27 14:02:56 +05:30
psinfo - > write ( PSTORE_TYPE_CONSOLE , 0 , & id , 0 , 0 , 0 , c , psinfo ) ;
2012-05-26 06:20:19 -07:00
spin_unlock_irqrestore ( & psinfo - > buf_lock , flags ) ;
s + = c ;
c = e - s ;
}
}
static struct console pstore_console = {
. name = " pstore " ,
. write = pstore_console_write ,
. flags = CON_PRINTBUFFER | CON_ENABLED | CON_ANYTIME ,
. index = - 1 ,
} ;
static void pstore_register_console ( void )
{
register_console ( & pstore_console ) ;
}
# else
static void pstore_register_console ( void ) { }
# endif
2012-07-09 17:10:40 -07:00
static int pstore_write_compat ( enum pstore_type_id type ,
enum kmsg_dump_reason reason ,
efi_pstore: Add a sequence counter to a variable name
[Issue]
Currently, a variable name, which identifies each entry, consists of type, id and ctime.
But if multiple events happens in a short time, a second/third event may fail to log because
efi_pstore can't distinguish each event with current variable name.
[Solution]
A reasonable way to identify all events precisely is introducing a sequence counter to
the variable name.
The sequence counter has already supported in a pstore layer with "oopscount".
So, this patch adds it to a variable name.
Also, it is passed to read/erase callbacks of platform drivers in accordance with
the modification of the variable name.
<before applying this patch>
a variable name of first event: dump-type0-1-12345678
a variable name of second event: dump-type0-1-12345678
type:0
id:1
ctime:12345678
If multiple events happen in a short time, efi_pstore can't distinguish them because
variable names are same among them.
<after applying this patch>
it can be distinguishable by adding a sequence counter as follows.
a variable name of first event: dump-type0-1-1-12345678
a variable name of Second event: dump-type0-1-2-12345678
type:0
id:1
sequence counter: 1(first event), 2(second event)
ctime:12345678
In case of a write callback executed in pstore_console_write(), "0" is added to
an argument of the write callback because it just logs all kernel messages and
doesn't need to care about multiple events.
Signed-off-by: Seiji Aguchi <seiji.aguchi@hds.com>
Acked-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
Acked-by: Mike Waychison <mikew@google.com>
Signed-off-by: Tony Luck <tony.luck@intel.com>
2012-11-26 16:07:44 -08:00
u64 * id , unsigned int part , int count ,
2013-08-16 13:52:47 -07:00
bool compressed , size_t size ,
2013-06-27 14:02:56 +05:30
struct pstore_info * psi )
2012-07-09 17:10:40 -07:00
{
2013-08-16 13:52:47 -07:00
return psi - > write_buf ( type , reason , id , part , psinfo - > buf , compressed ,
2013-06-27 14:02:56 +05:30
size , psi ) ;
2012-07-09 17:10:40 -07:00
}
2010-12-28 14:25:21 -08:00
/*
* platform specific persistent storage driver registers with
* us here . If pstore is already mounted , call the platform
* read function right away to populate the file system . If not
* then the pstore mount code will call us later to fill out
* the file system .
*
* Register with kmsg_dump to save last part of console log on panic .
*/
int pstore_register ( struct pstore_info * psi )
{
struct module * owner = psi - > owner ;
2013-06-28 17:11:33 -04:00
if ( backend & & strcmp ( backend , psi - > name ) )
return - EPERM ;
2010-12-28 14:25:21 -08:00
spin_lock ( & pstore_lock ) ;
if ( psinfo ) {
spin_unlock ( & pstore_lock ) ;
return - EBUSY ;
}
2011-07-21 16:57:55 -04:00
2012-07-09 17:10:40 -07:00
if ( ! psi - > write )
psi - > write = pstore_write_compat ;
2010-12-28 14:25:21 -08:00
psinfo = psi ;
2011-11-17 12:58:07 -08:00
mutex_init ( & psinfo - > read_mutex ) ;
2010-12-28 14:25:21 -08:00
spin_unlock ( & pstore_lock ) ;
if ( owner & & ! try_module_get ( owner ) ) {
psinfo = NULL ;
return - EINVAL ;
}
2013-08-16 13:53:10 -07:00
allocate_buf_for_compression ( ) ;
2010-12-28 14:25:21 -08:00
if ( pstore_is_mounted ( ) )
2011-08-11 15:14:39 -07:00
pstore_get_records ( 0 ) ;
2010-12-28 14:25:21 -08:00
kmsg_dump_register ( & pstore_dumper ) ;
2013-12-18 15:17:10 -08:00
if ( ( psi - > flags & PSTORE_FLAGS_FRAGILE ) = = 0 ) {
pstore_register_console ( ) ;
pstore_register_ftrace ( ) ;
}
2010-12-28 14:25:21 -08:00
2012-05-26 06:20:28 -07:00
if ( pstore_update_ms > = 0 ) {
pstore_timer . expires = jiffies +
msecs_to_jiffies ( pstore_update_ms ) ;
add_timer ( & pstore_timer ) ;
}
2011-08-11 15:14:39 -07:00
2014-06-06 14:37:31 -07:00
pr_info ( " Registered %s as persistent store backend \n " , psi - > name ) ;
2013-06-28 17:11:33 -04:00
2010-12-28 14:25:21 -08:00
return 0 ;
}
EXPORT_SYMBOL_GPL ( pstore_register ) ;
/*
2011-08-11 15:14:39 -07:00
* Read all the records from the persistent store . Create
* files in our filesystem . Don ' t warn about - EEXIST errors
* when we are re - scanning the backing store looking to add new
* error records .
2010-12-28 14:25:21 -08:00
*/
2011-08-11 15:14:39 -07:00
void pstore_get_records ( int quiet )
2010-12-28 14:25:21 -08:00
{
struct pstore_info * psi = psinfo ;
2011-11-17 12:58:07 -08:00
char * buf = NULL ;
2011-05-16 10:58:57 -07:00
ssize_t size ;
2010-12-28 14:25:21 -08:00
u64 id ;
efi_pstore: Add a sequence counter to a variable name
[Issue]
Currently, a variable name, which identifies each entry, consists of type, id and ctime.
But if multiple events happens in a short time, a second/third event may fail to log because
efi_pstore can't distinguish each event with current variable name.
[Solution]
A reasonable way to identify all events precisely is introducing a sequence counter to
the variable name.
The sequence counter has already supported in a pstore layer with "oopscount".
So, this patch adds it to a variable name.
Also, it is passed to read/erase callbacks of platform drivers in accordance with
the modification of the variable name.
<before applying this patch>
a variable name of first event: dump-type0-1-12345678
a variable name of second event: dump-type0-1-12345678
type:0
id:1
ctime:12345678
If multiple events happen in a short time, efi_pstore can't distinguish them because
variable names are same among them.
<after applying this patch>
it can be distinguishable by adding a sequence counter as follows.
a variable name of first event: dump-type0-1-1-12345678
a variable name of Second event: dump-type0-1-2-12345678
type:0
id:1
sequence counter: 1(first event), 2(second event)
ctime:12345678
In case of a write callback executed in pstore_console_write(), "0" is added to
an argument of the write callback because it just logs all kernel messages and
doesn't need to care about multiple events.
Signed-off-by: Seiji Aguchi <seiji.aguchi@hds.com>
Acked-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
Acked-by: Mike Waychison <mikew@google.com>
Signed-off-by: Tony Luck <tony.luck@intel.com>
2012-11-26 16:07:44 -08:00
int count ;
2010-12-28 14:25:21 -08:00
enum pstore_type_id type ;
struct timespec time ;
2011-05-16 11:00:27 -07:00
int failed = 0 , rc ;
2013-08-16 13:53:19 -07:00
bool compressed ;
2013-08-16 13:53:28 -07:00
int unzipped_len = - 1 ;
2010-12-28 14:25:21 -08:00
if ( ! psi )
return ;
2011-11-17 12:58:07 -08:00
mutex_lock ( & psi - > read_mutex ) ;
2011-11-18 13:49:00 -08:00
if ( psi - > open & & psi - > open ( psi ) )
2011-05-16 11:00:27 -07:00
goto out ;
2013-08-16 13:53:19 -07:00
while ( ( size = psi - > read ( & id , & type , & count , & time , & buf , & compressed ,
psi ) ) > 0 ) {
2013-08-16 13:53:28 -07:00
if ( compressed & & ( type = = PSTORE_TYPE_DMESG ) ) {
if ( big_oops_buf )
unzipped_len = pstore_decompress ( buf ,
big_oops_buf , size ,
big_oops_buf_sz ) ;
if ( unzipped_len > 0 ) {
2014-03-12 21:34:06 +08:00
kfree ( buf ) ;
2013-08-16 13:53:28 -07:00
buf = big_oops_buf ;
size = unzipped_len ;
2013-08-16 13:53:39 -07:00
compressed = false ;
2013-08-16 13:53:28 -07:00
} else {
2014-06-06 14:37:31 -07:00
pr_err ( " decompression failed;returned %d \n " ,
unzipped_len ) ;
2013-08-16 13:53:39 -07:00
compressed = true ;
2013-08-16 13:53:28 -07:00
}
}
efi_pstore: Add a sequence counter to a variable name
[Issue]
Currently, a variable name, which identifies each entry, consists of type, id and ctime.
But if multiple events happens in a short time, a second/third event may fail to log because
efi_pstore can't distinguish each event with current variable name.
[Solution]
A reasonable way to identify all events precisely is introducing a sequence counter to
the variable name.
The sequence counter has already supported in a pstore layer with "oopscount".
So, this patch adds it to a variable name.
Also, it is passed to read/erase callbacks of platform drivers in accordance with
the modification of the variable name.
<before applying this patch>
a variable name of first event: dump-type0-1-12345678
a variable name of second event: dump-type0-1-12345678
type:0
id:1
ctime:12345678
If multiple events happen in a short time, efi_pstore can't distinguish them because
variable names are same among them.
<after applying this patch>
it can be distinguishable by adding a sequence counter as follows.
a variable name of first event: dump-type0-1-1-12345678
a variable name of Second event: dump-type0-1-2-12345678
type:0
id:1
sequence counter: 1(first event), 2(second event)
ctime:12345678
In case of a write callback executed in pstore_console_write(), "0" is added to
an argument of the write callback because it just logs all kernel messages and
doesn't need to care about multiple events.
Signed-off-by: Seiji Aguchi <seiji.aguchi@hds.com>
Acked-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
Acked-by: Mike Waychison <mikew@google.com>
Signed-off-by: Tony Luck <tony.luck@intel.com>
2012-11-26 16:07:44 -08:00
rc = pstore_mkfile ( type , psi - > name , id , count , buf ,
2013-08-16 13:53:39 -07:00
compressed , ( size_t ) size , time , psi ) ;
2013-08-16 13:53:28 -07:00
if ( unzipped_len < 0 ) {
/* Free buffer other than big oops */
kfree ( buf ) ;
buf = NULL ;
} else
unzipped_len = - 1 ;
2011-08-11 15:14:39 -07:00
if ( rc & & ( rc ! = - EEXIST | | ! quiet ) )
2010-12-28 14:25:21 -08:00
failed + + ;
}
2011-11-18 13:49:00 -08:00
if ( psi - > close )
psi - > close ( psi ) ;
2011-05-16 11:00:27 -07:00
out :
2011-11-17 12:58:07 -08:00
mutex_unlock ( & psi - > read_mutex ) ;
2010-12-28 14:25:21 -08:00
if ( failed )
2014-06-06 14:37:31 -07:00
pr_warn ( " failed to load %d record(s) from '%s' \n " ,
failed , psi - > name ) ;
2010-12-28 14:25:21 -08:00
}
2011-08-11 15:14:39 -07:00
static void pstore_dowork ( struct work_struct * work )
{
pstore_get_records ( 1 ) ;
}
static void pstore_timefunc ( unsigned long dummy )
{
if ( pstore_new_entry ) {
pstore_new_entry = 0 ;
schedule_work ( & pstore_work ) ;
}
2012-05-26 06:20:28 -07:00
mod_timer ( & pstore_timer , jiffies + msecs_to_jiffies ( pstore_update_ms ) ) ;
2011-08-11 15:14:39 -07:00
}
2011-07-21 16:57:55 -04:00
module_param ( backend , charp , 0444 ) ;
MODULE_PARM_DESC ( backend , " Pstore backend to use " ) ;