linux/kernel/irq/handle.c
Peter Zijlstra de8f5e4f2d lockdep: Introduce wait-type checks
Extend lockdep to validate lock wait-type context.

The current wait-types are:

	LD_WAIT_FREE,		/* wait free, rcu etc.. */
	LD_WAIT_SPIN,		/* spin loops, raw_spinlock_t etc.. */
	LD_WAIT_CONFIG,		/* CONFIG_PREEMPT_LOCK, spinlock_t etc.. */
	LD_WAIT_SLEEP,		/* sleeping locks, mutex_t etc.. */

Where lockdep validates that the current lock (the one being acquired)
fits in the current wait-context (as generated by the held stack).

This ensures that there is no attempt to acquire mutexes while holding
spinlocks, to acquire spinlocks while holding raw_spinlocks and so on. In
other words, its a more fancy might_sleep().

Obviously RCU made the entire ordeal more complex than a simple single
value test because RCU can be acquired in (pretty much) any context and
while it presents a context to nested locks it is not the same as it
got acquired in.

Therefore its necessary to split the wait_type into two values, one
representing the acquire (outer) and one representing the nested context
(inner). For most 'normal' locks these two are the same.

[ To make static initialization easier we have the rule that:
  .outer == INV means .outer == .inner; because INV == 0. ]

It further means that its required to find the minimal .inner of the held
stack to compare against the outer of the new lock; because while 'normal'
RCU presents a CONFIG type to nested locks, if it is taken while already
holding a SPIN type it obviously doesn't relax the rules.

Below is an example output generated by the trivial test code:

  raw_spin_lock(&foo);
  spin_lock(&bar);
  spin_unlock(&bar);
  raw_spin_unlock(&foo);

 [ BUG: Invalid wait context ]
 -----------------------------
 swapper/0/1 is trying to lock:
 ffffc90000013f20 (&bar){....}-{3:3}, at: kernel_init+0xdb/0x187
 other info that might help us debug this:
 1 lock held by swapper/0/1:
  #0: ffffc90000013ee0 (&foo){+.+.}-{2:2}, at: kernel_init+0xd1/0x187

The way to read it is to look at the new -{n,m} part in the lock
description; -{3:3} for the attempted lock, and try and match that up to
the held locks, which in this case is the one: -{2,2}.

This tells that the acquiring lock requires a more relaxed environment than
presented by the lock stack.

Currently only the normal locks and RCU are converted, the rest of the
lockdep users defaults to .inner = INV which is ignored. More conversions
can be done when desired.

The check for spinlock_t nesting is not enabled by default. It's a separate
config option for now as there are known problems which are currently
addressed. The config option allows to identify these problems and to
verify that the solutions found are indeed solving them.

The config switch will be removed and the checks will permanently enabled
once the vast majority of issues has been addressed.

[ bigeasy: Move LD_WAIT_FREE,… out of CONFIG_LOCKDEP to avoid compile
	   failure with CONFIG_DEBUG_SPINLOCK + !CONFIG_LOCKDEP]
[ tglx: Add the config option ]

Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Link: https://lkml.kernel.org/r/20200321113242.427089655@linutronix.de
2020-03-21 16:00:24 +01:00

230 lines
5.8 KiB
C

// SPDX-License-Identifier: GPL-2.0
/*
* Copyright (C) 1992, 1998-2006 Linus Torvalds, Ingo Molnar
* Copyright (C) 2005-2006, Thomas Gleixner, Russell King
*
* This file contains the core interrupt handling code. Detailed
* information is available in Documentation/core-api/genericirq.rst
*
*/
#include <linux/irq.h>
#include <linux/random.h>
#include <linux/sched.h>
#include <linux/interrupt.h>
#include <linux/kernel_stat.h>
#include <trace/events/irq.h>
#include "internals.h"
#ifdef CONFIG_GENERIC_IRQ_MULTI_HANDLER
void (*handle_arch_irq)(struct pt_regs *) __ro_after_init;
#endif
/**
* handle_bad_irq - handle spurious and unhandled irqs
* @desc: description of the interrupt
*
* Handles spurious and unhandled IRQ's. It also prints a debugmessage.
*/
void handle_bad_irq(struct irq_desc *desc)
{
unsigned int irq = irq_desc_get_irq(desc);
print_irq_desc(irq, desc);
kstat_incr_irqs_this_cpu(desc);
ack_bad_irq(irq);
}
EXPORT_SYMBOL_GPL(handle_bad_irq);
/*
* Special, empty irq handler:
*/
irqreturn_t no_action(int cpl, void *dev_id)
{
return IRQ_NONE;
}
EXPORT_SYMBOL_GPL(no_action);
static void warn_no_thread(unsigned int irq, struct irqaction *action)
{
if (test_and_set_bit(IRQTF_WARNED, &action->thread_flags))
return;
printk(KERN_WARNING "IRQ %d device %s returned IRQ_WAKE_THREAD "
"but no thread function available.", irq, action->name);
}
void __irq_wake_thread(struct irq_desc *desc, struct irqaction *action)
{
/*
* In case the thread crashed and was killed we just pretend that
* we handled the interrupt. The hardirq handler has disabled the
* device interrupt, so no irq storm is lurking.
*/
if (action->thread->flags & PF_EXITING)
return;
/*
* Wake up the handler thread for this action. If the
* RUNTHREAD bit is already set, nothing to do.
*/
if (test_and_set_bit(IRQTF_RUNTHREAD, &action->thread_flags))
return;
/*
* It's safe to OR the mask lockless here. We have only two
* places which write to threads_oneshot: This code and the
* irq thread.
*
* This code is the hard irq context and can never run on two
* cpus in parallel. If it ever does we have more serious
* problems than this bitmask.
*
* The irq threads of this irq which clear their "running" bit
* in threads_oneshot are serialized via desc->lock against
* each other and they are serialized against this code by
* IRQS_INPROGRESS.
*
* Hard irq handler:
*
* spin_lock(desc->lock);
* desc->state |= IRQS_INPROGRESS;
* spin_unlock(desc->lock);
* set_bit(IRQTF_RUNTHREAD, &action->thread_flags);
* desc->threads_oneshot |= mask;
* spin_lock(desc->lock);
* desc->state &= ~IRQS_INPROGRESS;
* spin_unlock(desc->lock);
*
* irq thread:
*
* again:
* spin_lock(desc->lock);
* if (desc->state & IRQS_INPROGRESS) {
* spin_unlock(desc->lock);
* while(desc->state & IRQS_INPROGRESS)
* cpu_relax();
* goto again;
* }
* if (!test_bit(IRQTF_RUNTHREAD, &action->thread_flags))
* desc->threads_oneshot &= ~mask;
* spin_unlock(desc->lock);
*
* So either the thread waits for us to clear IRQS_INPROGRESS
* or we are waiting in the flow handler for desc->lock to be
* released before we reach this point. The thread also checks
* IRQTF_RUNTHREAD under desc->lock. If set it leaves
* threads_oneshot untouched and runs the thread another time.
*/
desc->threads_oneshot |= action->thread_mask;
/*
* We increment the threads_active counter in case we wake up
* the irq thread. The irq thread decrements the counter when
* it returns from the handler or in the exit path and wakes
* up waiters which are stuck in synchronize_irq() when the
* active count becomes zero. synchronize_irq() is serialized
* against this code (hard irq handler) via IRQS_INPROGRESS
* like the finalize_oneshot() code. See comment above.
*/
atomic_inc(&desc->threads_active);
wake_up_process(action->thread);
}
irqreturn_t __handle_irq_event_percpu(struct irq_desc *desc, unsigned int *flags)
{
irqreturn_t retval = IRQ_NONE;
unsigned int irq = desc->irq_data.irq;
struct irqaction *action;
record_irq_time(desc);
for_each_action_of_desc(desc, action) {
irqreturn_t res;
/*
* If this IRQ would be threaded under force_irqthreads, mark it so.
*/
if (irq_settings_can_thread(desc) &&
!(action->flags & (IRQF_NO_THREAD | IRQF_PERCPU | IRQF_ONESHOT)))
trace_hardirq_threaded();
trace_irq_handler_entry(irq, action);
res = action->handler(irq, action->dev_id);
trace_irq_handler_exit(irq, action, res);
if (WARN_ONCE(!irqs_disabled(),"irq %u handler %pS enabled interrupts\n",
irq, action->handler))
local_irq_disable();
switch (res) {
case IRQ_WAKE_THREAD:
/*
* Catch drivers which return WAKE_THREAD but
* did not set up a thread function
*/
if (unlikely(!action->thread_fn)) {
warn_no_thread(irq, action);
break;
}
__irq_wake_thread(desc, action);
/* Fall through - to add to randomness */
case IRQ_HANDLED:
*flags |= action->flags;
break;
default:
break;
}
retval |= res;
}
return retval;
}
irqreturn_t handle_irq_event_percpu(struct irq_desc *desc)
{
irqreturn_t retval;
unsigned int flags = 0;
retval = __handle_irq_event_percpu(desc, &flags);
add_interrupt_randomness(desc->irq_data.irq, flags);
if (!noirqdebug)
note_interrupt(desc, retval);
return retval;
}
irqreturn_t handle_irq_event(struct irq_desc *desc)
{
irqreturn_t ret;
desc->istate &= ~IRQS_PENDING;
irqd_set(&desc->irq_data, IRQD_IRQ_INPROGRESS);
raw_spin_unlock(&desc->lock);
ret = handle_irq_event_percpu(desc);
raw_spin_lock(&desc->lock);
irqd_clear(&desc->irq_data, IRQD_IRQ_INPROGRESS);
return ret;
}
#ifdef CONFIG_GENERIC_IRQ_MULTI_HANDLER
int __init set_handle_irq(void (*handle_irq)(struct pt_regs *))
{
if (handle_arch_irq)
return -EBUSY;
handle_arch_irq = handle_irq;
return 0;
}
#endif