irq_work, smp: Allow irq_work on call_single_queue
Currently irq_work_queue_on() will issue an unconditional arch_send_call_function_single_ipi() and has the handler do irq_work_run(). This is unfortunate in that it makes the IPI handler look at a second cacheline and it misses the opportunity to avoid the IPI. Instead note that struct irq_work and struct __call_single_data are very similar in layout, so use a few bits in the flags word to encode a type and stick the irq_work on the call_single_queue list. Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org> Signed-off-by: Ingo Molnar <mingo@kernel.org> Link: https://lore.kernel.org/r/20200526161908.011635912@infradead.org
This commit is contained in:
parent
b2a02fc43a
commit
4b44a21dd6
@ -13,6 +13,8 @@
|
|||||||
* busy NULL, 2 -> {free, claimed} : callback in progress, can be claimed
|
* busy NULL, 2 -> {free, claimed} : callback in progress, can be claimed
|
||||||
*/
|
*/
|
||||||
|
|
||||||
|
/* flags share CSD_FLAG_ space */
|
||||||
|
|
||||||
#define IRQ_WORK_PENDING BIT(0)
|
#define IRQ_WORK_PENDING BIT(0)
|
||||||
#define IRQ_WORK_BUSY BIT(1)
|
#define IRQ_WORK_BUSY BIT(1)
|
||||||
|
|
||||||
@ -23,9 +25,12 @@
|
|||||||
|
|
||||||
#define IRQ_WORK_CLAIMED (IRQ_WORK_PENDING | IRQ_WORK_BUSY)
|
#define IRQ_WORK_CLAIMED (IRQ_WORK_PENDING | IRQ_WORK_BUSY)
|
||||||
|
|
||||||
|
/*
|
||||||
|
* structure shares layout with single_call_data_t.
|
||||||
|
*/
|
||||||
struct irq_work {
|
struct irq_work {
|
||||||
atomic_t flags;
|
|
||||||
struct llist_node llnode;
|
struct llist_node llnode;
|
||||||
|
atomic_t flags;
|
||||||
void (*func)(struct irq_work *);
|
void (*func)(struct irq_work *);
|
||||||
};
|
};
|
||||||
|
|
||||||
|
@ -16,17 +16,38 @@
|
|||||||
|
|
||||||
typedef void (*smp_call_func_t)(void *info);
|
typedef void (*smp_call_func_t)(void *info);
|
||||||
typedef bool (*smp_cond_func_t)(int cpu, void *info);
|
typedef bool (*smp_cond_func_t)(int cpu, void *info);
|
||||||
|
|
||||||
|
enum {
|
||||||
|
CSD_FLAG_LOCK = 0x01,
|
||||||
|
|
||||||
|
/* IRQ_WORK_flags */
|
||||||
|
|
||||||
|
CSD_TYPE_ASYNC = 0x00,
|
||||||
|
CSD_TYPE_SYNC = 0x10,
|
||||||
|
CSD_TYPE_IRQ_WORK = 0x20,
|
||||||
|
CSD_FLAG_TYPE_MASK = 0xF0,
|
||||||
|
};
|
||||||
|
|
||||||
|
/*
|
||||||
|
* structure shares (partial) layout with struct irq_work
|
||||||
|
*/
|
||||||
struct __call_single_data {
|
struct __call_single_data {
|
||||||
struct llist_node llist;
|
struct llist_node llist;
|
||||||
|
unsigned int flags;
|
||||||
smp_call_func_t func;
|
smp_call_func_t func;
|
||||||
void *info;
|
void *info;
|
||||||
unsigned int flags;
|
|
||||||
};
|
};
|
||||||
|
|
||||||
/* Use __aligned() to avoid to use 2 cache lines for 1 csd */
|
/* Use __aligned() to avoid to use 2 cache lines for 1 csd */
|
||||||
typedef struct __call_single_data call_single_data_t
|
typedef struct __call_single_data call_single_data_t
|
||||||
__aligned(sizeof(struct __call_single_data));
|
__aligned(sizeof(struct __call_single_data));
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Enqueue a llist_node on the call_single_queue; be very careful, read
|
||||||
|
* flush_smp_call_function_queue() in detail.
|
||||||
|
*/
|
||||||
|
extern void __smp_call_single_queue(int cpu, struct llist_node *node);
|
||||||
|
|
||||||
/* total number of cpus in this system (may exceed NR_CPUS) */
|
/* total number of cpus in this system (may exceed NR_CPUS) */
|
||||||
extern unsigned int total_cpus;
|
extern unsigned int total_cpus;
|
||||||
|
|
||||||
|
@ -31,7 +31,7 @@ static bool irq_work_claim(struct irq_work *work)
|
|||||||
{
|
{
|
||||||
int oflags;
|
int oflags;
|
||||||
|
|
||||||
oflags = atomic_fetch_or(IRQ_WORK_CLAIMED, &work->flags);
|
oflags = atomic_fetch_or(IRQ_WORK_CLAIMED | CSD_TYPE_IRQ_WORK, &work->flags);
|
||||||
/*
|
/*
|
||||||
* If the work is already pending, no need to raise the IPI.
|
* If the work is already pending, no need to raise the IPI.
|
||||||
* The pairing atomic_fetch_andnot() in irq_work_run() makes sure
|
* The pairing atomic_fetch_andnot() in irq_work_run() makes sure
|
||||||
@ -102,8 +102,7 @@ bool irq_work_queue_on(struct irq_work *work, int cpu)
|
|||||||
if (cpu != smp_processor_id()) {
|
if (cpu != smp_processor_id()) {
|
||||||
/* Arch remote IPI send/receive backend aren't NMI safe */
|
/* Arch remote IPI send/receive backend aren't NMI safe */
|
||||||
WARN_ON_ONCE(in_nmi());
|
WARN_ON_ONCE(in_nmi());
|
||||||
if (llist_add(&work->llnode, &per_cpu(raised_list, cpu)))
|
__smp_call_single_queue(cpu, &work->llnode);
|
||||||
arch_send_call_function_single_ipi(cpu);
|
|
||||||
} else {
|
} else {
|
||||||
__irq_work_queue_local(work);
|
__irq_work_queue_local(work);
|
||||||
}
|
}
|
||||||
@ -131,6 +130,31 @@ bool irq_work_needs_cpu(void)
|
|||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
void irq_work_single(void *arg)
|
||||||
|
{
|
||||||
|
struct irq_work *work = arg;
|
||||||
|
int flags;
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Clear the PENDING bit, after this point the @work
|
||||||
|
* can be re-used.
|
||||||
|
* Make it immediately visible so that other CPUs trying
|
||||||
|
* to claim that work don't rely on us to handle their data
|
||||||
|
* while we are in the middle of the func.
|
||||||
|
*/
|
||||||
|
flags = atomic_fetch_andnot(IRQ_WORK_PENDING, &work->flags);
|
||||||
|
|
||||||
|
lockdep_irq_work_enter(work);
|
||||||
|
work->func(work);
|
||||||
|
lockdep_irq_work_exit(work);
|
||||||
|
/*
|
||||||
|
* Clear the BUSY bit and return to the free state if
|
||||||
|
* no-one else claimed it meanwhile.
|
||||||
|
*/
|
||||||
|
flags &= ~IRQ_WORK_PENDING;
|
||||||
|
(void)atomic_cmpxchg(&work->flags, flags, flags & ~IRQ_WORK_BUSY);
|
||||||
|
}
|
||||||
|
|
||||||
static void irq_work_run_list(struct llist_head *list)
|
static void irq_work_run_list(struct llist_head *list)
|
||||||
{
|
{
|
||||||
struct irq_work *work, *tmp;
|
struct irq_work *work, *tmp;
|
||||||
@ -142,27 +166,8 @@ static void irq_work_run_list(struct llist_head *list)
|
|||||||
return;
|
return;
|
||||||
|
|
||||||
llnode = llist_del_all(list);
|
llnode = llist_del_all(list);
|
||||||
llist_for_each_entry_safe(work, tmp, llnode, llnode) {
|
llist_for_each_entry_safe(work, tmp, llnode, llnode)
|
||||||
int flags;
|
irq_work_single(work);
|
||||||
/*
|
|
||||||
* Clear the PENDING bit, after this point the @work
|
|
||||||
* can be re-used.
|
|
||||||
* Make it immediately visible so that other CPUs trying
|
|
||||||
* to claim that work don't rely on us to handle their data
|
|
||||||
* while we are in the middle of the func.
|
|
||||||
*/
|
|
||||||
flags = atomic_fetch_andnot(IRQ_WORK_PENDING, &work->flags);
|
|
||||||
|
|
||||||
lockdep_irq_work_enter(work);
|
|
||||||
work->func(work);
|
|
||||||
lockdep_irq_work_exit(work);
|
|
||||||
/*
|
|
||||||
* Clear the BUSY bit and return to the free state if
|
|
||||||
* no-one else claimed it meanwhile.
|
|
||||||
*/
|
|
||||||
flags &= ~IRQ_WORK_PENDING;
|
|
||||||
(void)atomic_cmpxchg(&work->flags, flags, flags & ~IRQ_WORK_BUSY);
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
||||||
/*
|
/*
|
||||||
|
119
kernel/smp.c
119
kernel/smp.c
@ -23,10 +23,8 @@
|
|||||||
|
|
||||||
#include "smpboot.h"
|
#include "smpboot.h"
|
||||||
|
|
||||||
enum {
|
|
||||||
CSD_FLAG_LOCK = 0x01,
|
#define CSD_TYPE(_csd) ((_csd)->flags & CSD_FLAG_TYPE_MASK)
|
||||||
CSD_FLAG_SYNCHRONOUS = 0x02,
|
|
||||||
};
|
|
||||||
|
|
||||||
struct call_function_data {
|
struct call_function_data {
|
||||||
call_single_data_t __percpu *csd;
|
call_single_data_t __percpu *csd;
|
||||||
@ -137,15 +135,33 @@ static DEFINE_PER_CPU_SHARED_ALIGNED(call_single_data_t, csd_data);
|
|||||||
|
|
||||||
extern void send_call_function_single_ipi(int cpu);
|
extern void send_call_function_single_ipi(int cpu);
|
||||||
|
|
||||||
|
void __smp_call_single_queue(int cpu, struct llist_node *node)
|
||||||
|
{
|
||||||
|
/*
|
||||||
|
* The list addition should be visible before sending the IPI
|
||||||
|
* handler locks the list to pull the entry off it because of
|
||||||
|
* normal cache coherency rules implied by spinlocks.
|
||||||
|
*
|
||||||
|
* If IPIs can go out of order to the cache coherency protocol
|
||||||
|
* in an architecture, sufficient synchronisation should be added
|
||||||
|
* to arch code to make it appear to obey cache coherency WRT
|
||||||
|
* locking and barrier primitives. Generic code isn't really
|
||||||
|
* equipped to do the right thing...
|
||||||
|
*/
|
||||||
|
if (llist_add(node, &per_cpu(call_single_queue, cpu)))
|
||||||
|
send_call_function_single_ipi(cpu);
|
||||||
|
}
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Insert a previously allocated call_single_data_t element
|
* Insert a previously allocated call_single_data_t element
|
||||||
* for execution on the given CPU. data must already have
|
* for execution on the given CPU. data must already have
|
||||||
* ->func, ->info, and ->flags set.
|
* ->func, ->info, and ->flags set.
|
||||||
*/
|
*/
|
||||||
static int generic_exec_single(int cpu, call_single_data_t *csd,
|
static int generic_exec_single(int cpu, call_single_data_t *csd)
|
||||||
smp_call_func_t func, void *info)
|
|
||||||
{
|
{
|
||||||
if (cpu == smp_processor_id()) {
|
if (cpu == smp_processor_id()) {
|
||||||
|
smp_call_func_t func = csd->func;
|
||||||
|
void *info = csd->info;
|
||||||
unsigned long flags;
|
unsigned long flags;
|
||||||
|
|
||||||
/*
|
/*
|
||||||
@ -159,28 +175,12 @@ static int generic_exec_single(int cpu, call_single_data_t *csd,
|
|||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
if ((unsigned)cpu >= nr_cpu_ids || !cpu_online(cpu)) {
|
if ((unsigned)cpu >= nr_cpu_ids || !cpu_online(cpu)) {
|
||||||
csd_unlock(csd);
|
csd_unlock(csd);
|
||||||
return -ENXIO;
|
return -ENXIO;
|
||||||
}
|
}
|
||||||
|
|
||||||
csd->func = func;
|
__smp_call_single_queue(cpu, &csd->llist);
|
||||||
csd->info = info;
|
|
||||||
|
|
||||||
/*
|
|
||||||
* The list addition should be visible before sending the IPI
|
|
||||||
* handler locks the list to pull the entry off it because of
|
|
||||||
* normal cache coherency rules implied by spinlocks.
|
|
||||||
*
|
|
||||||
* If IPIs can go out of order to the cache coherency protocol
|
|
||||||
* in an architecture, sufficient synchronisation should be added
|
|
||||||
* to arch code to make it appear to obey cache coherency WRT
|
|
||||||
* locking and barrier primitives. Generic code isn't really
|
|
||||||
* equipped to do the right thing...
|
|
||||||
*/
|
|
||||||
if (llist_add(&csd->llist, &per_cpu(call_single_queue, cpu)))
|
|
||||||
send_call_function_single_ipi(cpu);
|
|
||||||
|
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
@ -194,16 +194,10 @@ static int generic_exec_single(int cpu, call_single_data_t *csd,
|
|||||||
void generic_smp_call_function_single_interrupt(void)
|
void generic_smp_call_function_single_interrupt(void)
|
||||||
{
|
{
|
||||||
flush_smp_call_function_queue(true);
|
flush_smp_call_function_queue(true);
|
||||||
|
|
||||||
/*
|
|
||||||
* Handle irq works queued remotely by irq_work_queue_on().
|
|
||||||
* Smp functions above are typically synchronous so they
|
|
||||||
* better run first since some other CPUs may be busy waiting
|
|
||||||
* for them.
|
|
||||||
*/
|
|
||||||
irq_work_run();
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
extern void irq_work_single(void *);
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* flush_smp_call_function_queue - Flush pending smp-call-function callbacks
|
* flush_smp_call_function_queue - Flush pending smp-call-function callbacks
|
||||||
*
|
*
|
||||||
@ -241,9 +235,21 @@ static void flush_smp_call_function_queue(bool warn_cpu_offline)
|
|||||||
* We don't have to use the _safe() variant here
|
* We don't have to use the _safe() variant here
|
||||||
* because we are not invoking the IPI handlers yet.
|
* because we are not invoking the IPI handlers yet.
|
||||||
*/
|
*/
|
||||||
llist_for_each_entry(csd, entry, llist)
|
llist_for_each_entry(csd, entry, llist) {
|
||||||
pr_warn("IPI callback %pS sent to offline CPU\n",
|
switch (CSD_TYPE(csd)) {
|
||||||
csd->func);
|
case CSD_TYPE_ASYNC:
|
||||||
|
case CSD_TYPE_SYNC:
|
||||||
|
case CSD_TYPE_IRQ_WORK:
|
||||||
|
pr_warn("IPI callback %pS sent to offline CPU\n",
|
||||||
|
csd->func);
|
||||||
|
break;
|
||||||
|
|
||||||
|
default:
|
||||||
|
pr_warn("IPI callback, unknown type %d, sent to offline CPU\n",
|
||||||
|
CSD_TYPE(csd));
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
/*
|
/*
|
||||||
@ -251,16 +257,17 @@ static void flush_smp_call_function_queue(bool warn_cpu_offline)
|
|||||||
*/
|
*/
|
||||||
prev = NULL;
|
prev = NULL;
|
||||||
llist_for_each_entry_safe(csd, csd_next, entry, llist) {
|
llist_for_each_entry_safe(csd, csd_next, entry, llist) {
|
||||||
smp_call_func_t func = csd->func;
|
|
||||||
void *info = csd->info;
|
|
||||||
|
|
||||||
/* Do we wait until *after* callback? */
|
/* Do we wait until *after* callback? */
|
||||||
if (csd->flags & CSD_FLAG_SYNCHRONOUS) {
|
if (CSD_TYPE(csd) == CSD_TYPE_SYNC) {
|
||||||
|
smp_call_func_t func = csd->func;
|
||||||
|
void *info = csd->info;
|
||||||
|
|
||||||
if (prev) {
|
if (prev) {
|
||||||
prev->next = &csd_next->llist;
|
prev->next = &csd_next->llist;
|
||||||
} else {
|
} else {
|
||||||
entry = &csd_next->llist;
|
entry = &csd_next->llist;
|
||||||
}
|
}
|
||||||
|
|
||||||
func(info);
|
func(info);
|
||||||
csd_unlock(csd);
|
csd_unlock(csd);
|
||||||
} else {
|
} else {
|
||||||
@ -272,11 +279,17 @@ static void flush_smp_call_function_queue(bool warn_cpu_offline)
|
|||||||
* Second; run all !SYNC callbacks.
|
* Second; run all !SYNC callbacks.
|
||||||
*/
|
*/
|
||||||
llist_for_each_entry_safe(csd, csd_next, entry, llist) {
|
llist_for_each_entry_safe(csd, csd_next, entry, llist) {
|
||||||
smp_call_func_t func = csd->func;
|
int type = CSD_TYPE(csd);
|
||||||
void *info = csd->info;
|
|
||||||
|
|
||||||
csd_unlock(csd);
|
if (type == CSD_TYPE_ASYNC) {
|
||||||
func(info);
|
smp_call_func_t func = csd->func;
|
||||||
|
void *info = csd->info;
|
||||||
|
|
||||||
|
csd_unlock(csd);
|
||||||
|
func(info);
|
||||||
|
} else if (type == CSD_TYPE_IRQ_WORK) {
|
||||||
|
irq_work_single(csd);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -305,7 +318,7 @@ int smp_call_function_single(int cpu, smp_call_func_t func, void *info,
|
|||||||
{
|
{
|
||||||
call_single_data_t *csd;
|
call_single_data_t *csd;
|
||||||
call_single_data_t csd_stack = {
|
call_single_data_t csd_stack = {
|
||||||
.flags = CSD_FLAG_LOCK | CSD_FLAG_SYNCHRONOUS,
|
.flags = CSD_FLAG_LOCK | CSD_TYPE_SYNC,
|
||||||
};
|
};
|
||||||
int this_cpu;
|
int this_cpu;
|
||||||
int err;
|
int err;
|
||||||
@ -339,7 +352,10 @@ int smp_call_function_single(int cpu, smp_call_func_t func, void *info,
|
|||||||
csd_lock(csd);
|
csd_lock(csd);
|
||||||
}
|
}
|
||||||
|
|
||||||
err = generic_exec_single(cpu, csd, func, info);
|
csd->func = func;
|
||||||
|
csd->info = info;
|
||||||
|
|
||||||
|
err = generic_exec_single(cpu, csd);
|
||||||
|
|
||||||
if (wait)
|
if (wait)
|
||||||
csd_lock_wait(csd);
|
csd_lock_wait(csd);
|
||||||
@ -385,7 +401,7 @@ int smp_call_function_single_async(int cpu, call_single_data_t *csd)
|
|||||||
csd->flags = CSD_FLAG_LOCK;
|
csd->flags = CSD_FLAG_LOCK;
|
||||||
smp_wmb();
|
smp_wmb();
|
||||||
|
|
||||||
err = generic_exec_single(cpu, csd, csd->func, csd->info);
|
err = generic_exec_single(cpu, csd);
|
||||||
|
|
||||||
out:
|
out:
|
||||||
preempt_enable();
|
preempt_enable();
|
||||||
@ -500,7 +516,7 @@ static void smp_call_function_many_cond(const struct cpumask *mask,
|
|||||||
|
|
||||||
csd_lock(csd);
|
csd_lock(csd);
|
||||||
if (wait)
|
if (wait)
|
||||||
csd->flags |= CSD_FLAG_SYNCHRONOUS;
|
csd->flags |= CSD_TYPE_SYNC;
|
||||||
csd->func = func;
|
csd->func = func;
|
||||||
csd->info = info;
|
csd->info = info;
|
||||||
if (llist_add(&csd->llist, &per_cpu(call_single_queue, cpu)))
|
if (llist_add(&csd->llist, &per_cpu(call_single_queue, cpu)))
|
||||||
@ -632,6 +648,17 @@ void __init smp_init(void)
|
|||||||
{
|
{
|
||||||
int num_nodes, num_cpus;
|
int num_nodes, num_cpus;
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Ensure struct irq_work layout matches so that
|
||||||
|
* flush_smp_call_function_queue() can do horrible things.
|
||||||
|
*/
|
||||||
|
BUILD_BUG_ON(offsetof(struct irq_work, llnode) !=
|
||||||
|
offsetof(struct __call_single_data, llist));
|
||||||
|
BUILD_BUG_ON(offsetof(struct irq_work, func) !=
|
||||||
|
offsetof(struct __call_single_data, func));
|
||||||
|
BUILD_BUG_ON(offsetof(struct irq_work, flags) !=
|
||||||
|
offsetof(struct __call_single_data, flags));
|
||||||
|
|
||||||
idle_threads_init();
|
idle_threads_init();
|
||||||
cpuhp_threads_init();
|
cpuhp_threads_init();
|
||||||
|
|
||||||
|
Loading…
x
Reference in New Issue
Block a user