d8550860d9
When the scheduler sets TIF_NEED_RESCHED & we call into the scheduler from arch/mips/kernel/entry.S we disable interrupts. This is true regardless of whether we reach work_resched from syscall_exit_work, resume_userspace or by looping after calling schedule(). Although we disable interrupts in these paths we don't call trace_hardirqs_off() before calling into C code which may acquire locks, and we therefore leave lockdep with an inconsistent view of whether interrupts are disabled or not when CONFIG_PROVE_LOCKING & CONFIG_DEBUG_LOCKDEP are both enabled. Without tracing this interrupt state lockdep will print warnings such as the following once a task returns from a syscall via syscall_exit_partial with TIF_NEED_RESCHED set: [ 49.927678] ------------[ cut here ]------------ [ 49.934445] WARNING: CPU: 0 PID: 1 at kernel/locking/lockdep.c:3687 check_flags.part.41+0x1dc/0x1e8 [ 49.946031] DEBUG_LOCKS_WARN_ON(current->hardirqs_enabled) [ 49.946355] CPU: 0 PID: 1 Comm: init Not tainted 4.10.0-00439-gc9fd5d362289-dirty #197 [ 49.963505] Stack : 0000000000000000 ffffffff81bb5d6a 0000000000000006 ffffffff801ce9c4 [ 49.974431] 0000000000000000 0000000000000000 0000000000000000 000000000000004a [ 49.985300] ffffffff80b7e487 ffffffff80a24498 a8000000ff160000 ffffffff80ede8b8 [ 49.996194] 0000000000000001 0000000000000000 0000000000000000 0000000077c8030c [ 50.007063] 000000007fd8a510 ffffffff801cd45c 0000000000000000 a8000000ff127c88 [ 50.017945] 0000000000000000 ffffffff801cf928 0000000000000001 ffffffff80a24498 [ 50.028827] 0000000000000000 0000000000000001 0000000000000000 0000000000000000 [ 50.039688] 0000000000000000 a8000000ff127bd0 0000000000000000 ffffffff805509bc [ 50.050575] 00000000140084e0 0000000000000000 0000000000000000 0000000000040a00 [ 50.061448] 0000000000000000 ffffffff8010e1b0 0000000000000000 ffffffff805509bc [ 50.072327] ... [ 50.076087] Call Trace: [ 50.079869] [<ffffffff8010e1b0>] show_stack+0x80/0xa8 [ 50.086577] [<ffffffff805509bc>] dump_stack+0x10c/0x190 [ 50.093498] [<ffffffff8015dde0>] __warn+0xf0/0x108 [ 50.099889] [<ffffffff8015de34>] warn_slowpath_fmt+0x3c/0x48 [ 50.107241] [<ffffffff801c15b4>] check_flags.part.41+0x1dc/0x1e8 [ 50.114961] [<ffffffff801c239c>] lock_is_held_type+0x8c/0xb0 [ 50.122291] [<ffffffff809461b8>] __schedule+0x8c0/0x10f8 [ 50.129221] [<ffffffff80946a60>] schedule+0x30/0x98 [ 50.135659] [<ffffffff80106278>] work_resched+0x8/0x34 [ 50.142397] ---[ end trace 0cb4f6ef5b99fe21 ]--- [ 50.148405] possible reason: unannotated irqs-off. [ 50.154600] irq event stamp: 400463 [ 50.159566] hardirqs last enabled at (400463): [<ffffffff8094edc8>] _raw_spin_unlock_irqrestore+0x40/0xa8 [ 50.171981] hardirqs last disabled at (400462): [<ffffffff8094eb98>] _raw_spin_lock_irqsave+0x30/0xb0 [ 50.183897] softirqs last enabled at (400450): [<ffffffff8016580c>] __do_softirq+0x4ac/0x6a8 [ 50.195015] softirqs last disabled at (400425): [<ffffffff80165e78>] irq_exit+0x110/0x128 Fix this by using the TRACE_IRQS_OFF macro to call trace_hardirqs_off() when CONFIG_TRACE_IRQFLAGS is enabled. This is done before invoking schedule() following the work_resched label because: 1) Interrupts are disabled regardless of the path we take to reach work_resched() & schedule(). 2) Performing the tracing here avoids the need to do it in paths which disable interrupts but don't call out to C code before hitting a path which uses the RESTORE_SOME macro that will call trace_hardirqs_on() or trace_hardirqs_off() as appropriate. We call trace_hardirqs_on() using the TRACE_IRQS_ON macro before calling syscall_trace_leave() for similar reasons, ensuring that lockdep has a consistent view of state after we re-enable interrupts. Signed-off-by: Paul Burton <paul.burton@imgtec.com> Fixes: 1da177e4c3f4 ("Linux-2.6.12-rc2") Cc: linux-mips@linux-mips.org Cc: stable <stable@vger.kernel.org> Patchwork: https://patchwork.linux-mips.org/patch/15385/ Signed-off-by: Ralf Baechle <ralf@linux-mips.org>
180 lines
4.2 KiB
ArmAsm
180 lines
4.2 KiB
ArmAsm
/*
|
|
* This file is subject to the terms and conditions of the GNU General Public
|
|
* License. See the file "COPYING" in the main directory of this archive
|
|
* for more details.
|
|
*
|
|
* Copyright (C) 1994 - 2000, 2001, 2003 Ralf Baechle
|
|
* Copyright (C) 1999, 2000 Silicon Graphics, Inc.
|
|
* Copyright (C) 2001 MIPS Technologies, Inc.
|
|
*/
|
|
|
|
#include <asm/asm.h>
|
|
#include <asm/asmmacro.h>
|
|
#include <asm/compiler.h>
|
|
#include <asm/irqflags.h>
|
|
#include <asm/regdef.h>
|
|
#include <asm/mipsregs.h>
|
|
#include <asm/stackframe.h>
|
|
#include <asm/isadep.h>
|
|
#include <asm/thread_info.h>
|
|
#include <asm/war.h>
|
|
|
|
#ifndef CONFIG_PREEMPT
|
|
#define resume_kernel restore_all
|
|
#else
|
|
#define __ret_from_irq ret_from_exception
|
|
#endif
|
|
|
|
.text
|
|
.align 5
|
|
#ifndef CONFIG_PREEMPT
|
|
FEXPORT(ret_from_exception)
|
|
local_irq_disable # preempt stop
|
|
b __ret_from_irq
|
|
#endif
|
|
FEXPORT(ret_from_irq)
|
|
LONG_S s0, TI_REGS($28)
|
|
FEXPORT(__ret_from_irq)
|
|
/*
|
|
* We can be coming here from a syscall done in the kernel space,
|
|
* e.g. a failed kernel_execve().
|
|
*/
|
|
resume_userspace_check:
|
|
LONG_L t0, PT_STATUS(sp) # returning to kernel mode?
|
|
andi t0, t0, KU_USER
|
|
beqz t0, resume_kernel
|
|
|
|
resume_userspace:
|
|
local_irq_disable # make sure we dont miss an
|
|
# interrupt setting need_resched
|
|
# between sampling and return
|
|
LONG_L a2, TI_FLAGS($28) # current->work
|
|
andi t0, a2, _TIF_WORK_MASK # (ignoring syscall_trace)
|
|
bnez t0, work_pending
|
|
j restore_all
|
|
|
|
#ifdef CONFIG_PREEMPT
|
|
resume_kernel:
|
|
local_irq_disable
|
|
lw t0, TI_PRE_COUNT($28)
|
|
bnez t0, restore_all
|
|
need_resched:
|
|
LONG_L t0, TI_FLAGS($28)
|
|
andi t1, t0, _TIF_NEED_RESCHED
|
|
beqz t1, restore_all
|
|
LONG_L t0, PT_STATUS(sp) # Interrupts off?
|
|
andi t0, 1
|
|
beqz t0, restore_all
|
|
jal preempt_schedule_irq
|
|
b need_resched
|
|
#endif
|
|
|
|
FEXPORT(ret_from_kernel_thread)
|
|
jal schedule_tail # a0 = struct task_struct *prev
|
|
move a0, s1
|
|
jal s0
|
|
j syscall_exit
|
|
|
|
FEXPORT(ret_from_fork)
|
|
jal schedule_tail # a0 = struct task_struct *prev
|
|
|
|
FEXPORT(syscall_exit)
|
|
local_irq_disable # make sure need_resched and
|
|
# signals dont change between
|
|
# sampling and return
|
|
LONG_L a2, TI_FLAGS($28) # current->work
|
|
li t0, _TIF_ALLWORK_MASK
|
|
and t0, a2, t0
|
|
bnez t0, syscall_exit_work
|
|
|
|
restore_all: # restore full frame
|
|
.set noat
|
|
RESTORE_TEMP
|
|
RESTORE_AT
|
|
RESTORE_STATIC
|
|
restore_partial: # restore partial frame
|
|
#ifdef CONFIG_TRACE_IRQFLAGS
|
|
SAVE_STATIC
|
|
SAVE_AT
|
|
SAVE_TEMP
|
|
LONG_L v0, PT_STATUS(sp)
|
|
#if defined(CONFIG_CPU_R3000) || defined(CONFIG_CPU_TX39XX)
|
|
and v0, ST0_IEP
|
|
#else
|
|
and v0, ST0_IE
|
|
#endif
|
|
beqz v0, 1f
|
|
jal trace_hardirqs_on
|
|
b 2f
|
|
1: jal trace_hardirqs_off
|
|
2:
|
|
RESTORE_TEMP
|
|
RESTORE_AT
|
|
RESTORE_STATIC
|
|
#endif
|
|
RESTORE_SOME
|
|
RESTORE_SP_AND_RET
|
|
.set at
|
|
|
|
work_pending:
|
|
andi t0, a2, _TIF_NEED_RESCHED # a2 is preloaded with TI_FLAGS
|
|
beqz t0, work_notifysig
|
|
work_resched:
|
|
TRACE_IRQS_OFF
|
|
jal schedule
|
|
|
|
local_irq_disable # make sure need_resched and
|
|
# signals dont change between
|
|
# sampling and return
|
|
LONG_L a2, TI_FLAGS($28)
|
|
andi t0, a2, _TIF_WORK_MASK # is there any work to be done
|
|
# other than syscall tracing?
|
|
beqz t0, restore_all
|
|
andi t0, a2, _TIF_NEED_RESCHED
|
|
bnez t0, work_resched
|
|
|
|
work_notifysig: # deal with pending signals and
|
|
# notify-resume requests
|
|
move a0, sp
|
|
li a1, 0
|
|
jal do_notify_resume # a2 already loaded
|
|
j resume_userspace_check
|
|
|
|
FEXPORT(syscall_exit_partial)
|
|
local_irq_disable # make sure need_resched doesn't
|
|
# change between and return
|
|
LONG_L a2, TI_FLAGS($28) # current->work
|
|
li t0, _TIF_ALLWORK_MASK
|
|
and t0, a2
|
|
beqz t0, restore_partial
|
|
SAVE_STATIC
|
|
syscall_exit_work:
|
|
LONG_L t0, PT_STATUS(sp) # returning to kernel mode?
|
|
andi t0, t0, KU_USER
|
|
beqz t0, resume_kernel
|
|
li t0, _TIF_WORK_SYSCALL_EXIT
|
|
and t0, a2 # a2 is preloaded with TI_FLAGS
|
|
beqz t0, work_pending # trace bit set?
|
|
local_irq_enable # could let syscall_trace_leave()
|
|
# call schedule() instead
|
|
TRACE_IRQS_ON
|
|
move a0, sp
|
|
jal syscall_trace_leave
|
|
b resume_userspace
|
|
|
|
#if defined(CONFIG_CPU_MIPSR2) || defined(CONFIG_CPU_MIPSR6) || \
|
|
defined(CONFIG_MIPS_MT)
|
|
|
|
/*
|
|
* MIPS32R2 Instruction Hazard Barrier - must be called
|
|
*
|
|
* For C code use the inline version named instruction_hazard().
|
|
*/
|
|
LEAF(mips_ihb)
|
|
.set MIPS_ISA_LEVEL_RAW
|
|
jr.hb ra
|
|
nop
|
|
END(mips_ihb)
|
|
|
|
#endif /* CONFIG_CPU_MIPSR2 or CONFIG_CPU_MIPSR6 or CONFIG_MIPS_MT */
|