x86/vector: Simplify vector move cleanup
The vector move cleanup needs to walk the vector space and do a lot of sanity checks to find a vector to cleanup. With single CPU affinities this can be simplified and made more robust by queueing the vector configuration which needs to be cleaned up in a hlist on the CPU which was the previous target. That removes all the race conditions because the cleanup either finds a valid list entry or not. The latter happens when the interrupt was torn down before the cleanup handler was able to run. Signed-off-by: Thomas Gleixner <tglx@linutronix.de> Tested-by: Juergen Gross <jgross@suse.com> Tested-by: Yu Chen <yu.c.chen@intel.com> Acked-by: Juergen Gross <jgross@suse.com> Cc: Boris Ostrovsky <boris.ostrovsky@oracle.com> Cc: Tony Luck <tony.luck@intel.com> Cc: Marc Zyngier <marc.zyngier@arm.com> Cc: Alok Kataria <akataria@vmware.com> Cc: Joerg Roedel <joro@8bytes.org> Cc: "Rafael J. Wysocki" <rjw@rjwysocki.net> Cc: Steven Rostedt <rostedt@goodmis.org> Cc: Christoph Hellwig <hch@lst.de> Cc: Peter Zijlstra <peterz@infradead.org> Cc: Borislav Petkov <bp@alien8.de> Cc: Paolo Bonzini <pbonzini@redhat.com> Cc: Rui Zhang <rui.zhang@intel.com> Cc: "K. Y. Srinivasan" <kys@microsoft.com> Cc: Arjan van de Ven <arjan@linux.intel.com> Cc: Dan Williams <dan.j.williams@intel.com> Cc: Len Brown <lenb@kernel.org> Link: https://lkml.kernel.org/r/20170913213154.622727892@linutronix.de
This commit is contained in:
parent
029c6e1c9d
commit
dccfe3147b
@ -25,6 +25,7 @@ struct apic_chip_data {
|
||||
struct irq_cfg cfg;
|
||||
unsigned int cpu;
|
||||
unsigned int prev_cpu;
|
||||
struct hlist_node clist;
|
||||
cpumask_var_t domain;
|
||||
cpumask_var_t old_domain;
|
||||
u8 move_in_progress : 1;
|
||||
@ -38,6 +39,9 @@ static struct irq_chip lapic_controller;
|
||||
#ifdef CONFIG_X86_IO_APIC
|
||||
static struct apic_chip_data *legacy_irq_data[NR_IRQS_LEGACY];
|
||||
#endif
|
||||
#ifdef CONFIG_SMP
|
||||
static DEFINE_PER_CPU(struct hlist_head, cleanup_list);
|
||||
#endif
|
||||
|
||||
void lock_vector_lock(void)
|
||||
{
|
||||
@ -87,6 +91,7 @@ static struct apic_chip_data *alloc_apic_chip_data(int node)
|
||||
goto out_data;
|
||||
if (!zalloc_cpumask_var_node(&apicd->old_domain, GFP_KERNEL, node))
|
||||
goto out_domain;
|
||||
INIT_HLIST_NODE(&apicd->clist);
|
||||
return apicd;
|
||||
out_domain:
|
||||
free_cpumask_var(apicd->domain);
|
||||
@ -127,8 +132,7 @@ static int __assign_irq_vector(int irq, struct apic_chip_data *d,
|
||||
* If there is still a move in progress or the previous move has not
|
||||
* been cleaned up completely, tell the caller to come back later.
|
||||
*/
|
||||
if (d->move_in_progress ||
|
||||
cpumask_intersects(d->old_domain, cpu_online_mask))
|
||||
if (d->cfg.old_vector)
|
||||
return -EBUSY;
|
||||
|
||||
/* Only try and allocate irqs on cpus that are present */
|
||||
@ -263,38 +267,22 @@ static int assign_irq_vector_policy(int irq, int node,
|
||||
|
||||
static void clear_irq_vector(int irq, struct apic_chip_data *apicd)
|
||||
{
|
||||
struct irq_desc *desc;
|
||||
int cpu, vector;
|
||||
unsigned int vector = apicd->cfg.vector;
|
||||
|
||||
if (!apicd->cfg.vector)
|
||||
if (!vector)
|
||||
return;
|
||||
|
||||
vector = apicd->cfg.vector;
|
||||
for_each_cpu_and(cpu, apicd->domain, cpu_online_mask)
|
||||
per_cpu(vector_irq, cpu)[vector] = VECTOR_UNUSED;
|
||||
|
||||
per_cpu(vector_irq, apicd->cpu)[vector] = VECTOR_UNUSED;
|
||||
apicd->cfg.vector = 0;
|
||||
cpumask_clear(apicd->domain);
|
||||
|
||||
/*
|
||||
* If move is in progress or the old_domain mask is not empty,
|
||||
* i.e. the cleanup IPI has not been processed yet, we need to remove
|
||||
* the old references to desc from all cpus vector tables.
|
||||
*/
|
||||
if (!apicd->move_in_progress && cpumask_empty(apicd->old_domain))
|
||||
/* Clean up move in progress */
|
||||
vector = apicd->cfg.old_vector;
|
||||
if (!vector)
|
||||
return;
|
||||
|
||||
desc = irq_to_desc(irq);
|
||||
for_each_cpu_and(cpu, apicd->old_domain, cpu_online_mask) {
|
||||
for (vector = FIRST_EXTERNAL_VECTOR; vector < NR_VECTORS;
|
||||
vector++) {
|
||||
if (per_cpu(vector_irq, cpu)[vector] != desc)
|
||||
continue;
|
||||
per_cpu(vector_irq, cpu)[vector] = VECTOR_UNUSED;
|
||||
break;
|
||||
}
|
||||
}
|
||||
per_cpu(vector_irq, apicd->prev_cpu)[vector] = VECTOR_UNUSED;
|
||||
apicd->move_in_progress = 0;
|
||||
hlist_del_init(&apicd->clist);
|
||||
}
|
||||
|
||||
void init_irq_alloc_info(struct irq_alloc_info *info,
|
||||
@ -474,7 +462,7 @@ static void vector_update_shutdown_irqs(void)
|
||||
struct irq_data *irqd = irq_desc_get_irq_data(desc);
|
||||
struct apic_chip_data *ad = apic_chip_data(irqd);
|
||||
|
||||
if (ad && cpumask_test_cpu(cpu, ad->domain) && ad->cfg.vector)
|
||||
if (ad && ad->cfg.vector && ad->cpu == smp_processor_id())
|
||||
this_cpu_write(vector_irq[ad->cfg.vector], desc);
|
||||
}
|
||||
}
|
||||
@ -524,11 +512,9 @@ static int apic_retrigger_irq(struct irq_data *irqd)
|
||||
{
|
||||
struct apic_chip_data *apicd = apic_chip_data(irqd);
|
||||
unsigned long flags;
|
||||
int cpu;
|
||||
|
||||
raw_spin_lock_irqsave(&vector_lock, flags);
|
||||
cpu = cpumask_first_and(apicd->domain, cpu_online_mask);
|
||||
apic->send_IPI_mask(cpumask_of(cpu), apicd->cfg.vector);
|
||||
apic->send_IPI(apicd->cpu, apicd->cfg.vector);
|
||||
raw_spin_unlock_irqrestore(&vector_lock, flags);
|
||||
|
||||
return 1;
|
||||
@ -565,13 +551,56 @@ static struct irq_chip lapic_controller = {
|
||||
};
|
||||
|
||||
#ifdef CONFIG_SMP
|
||||
|
||||
asmlinkage __visible void __irq_entry smp_irq_move_cleanup_interrupt(void)
|
||||
{
|
||||
struct hlist_head *clhead = this_cpu_ptr(&cleanup_list);
|
||||
struct apic_chip_data *apicd;
|
||||
struct hlist_node *tmp;
|
||||
|
||||
entering_ack_irq();
|
||||
/* Prevent vectors vanishing under us */
|
||||
raw_spin_lock(&vector_lock);
|
||||
|
||||
hlist_for_each_entry_safe(apicd, tmp, clhead, clist) {
|
||||
unsigned int irr, vector = apicd->cfg.old_vector;
|
||||
|
||||
/*
|
||||
* Paranoia: Check if the vector that needs to be cleaned
|
||||
* up is registered at the APICs IRR. If so, then this is
|
||||
* not the best time to clean it up. Clean it up in the
|
||||
* next attempt by sending another IRQ_MOVE_CLEANUP_VECTOR
|
||||
* to this CPU. IRQ_MOVE_CLEANUP_VECTOR is the lowest
|
||||
* priority external vector, so on return from this
|
||||
* interrupt the device interrupt will happen first.
|
||||
*/
|
||||
irr = apic_read(APIC_IRR + (vector / 32 * 0x10));
|
||||
if (irr & (1U << (vector % 32))) {
|
||||
apic->send_IPI_self(IRQ_MOVE_CLEANUP_VECTOR);
|
||||
continue;
|
||||
}
|
||||
hlist_del_init(&apicd->clist);
|
||||
__this_cpu_write(vector_irq[vector], VECTOR_UNUSED);
|
||||
apicd->cfg.old_vector = 0;
|
||||
}
|
||||
|
||||
raw_spin_unlock(&vector_lock);
|
||||
exiting_irq();
|
||||
}
|
||||
|
||||
static void __send_cleanup_vector(struct apic_chip_data *apicd)
|
||||
{
|
||||
unsigned int cpu;
|
||||
|
||||
raw_spin_lock(&vector_lock);
|
||||
cpumask_and(apicd->old_domain, apicd->old_domain, cpu_online_mask);
|
||||
apicd->move_in_progress = 0;
|
||||
if (!cpumask_empty(apicd->old_domain))
|
||||
apic->send_IPI_mask(apicd->old_domain, IRQ_MOVE_CLEANUP_VECTOR);
|
||||
cpu = apicd->prev_cpu;
|
||||
if (cpu_online(cpu)) {
|
||||
hlist_add_head(&apicd->clist, per_cpu_ptr(&cleanup_list, cpu));
|
||||
apic->send_IPI(cpu, IRQ_MOVE_CLEANUP_VECTOR);
|
||||
} else {
|
||||
apicd->cfg.old_vector = 0;
|
||||
}
|
||||
raw_spin_unlock(&vector_lock);
|
||||
}
|
||||
|
||||
@ -584,95 +613,15 @@ void send_cleanup_vector(struct irq_cfg *cfg)
|
||||
__send_cleanup_vector(apicd);
|
||||
}
|
||||
|
||||
asmlinkage __visible void __irq_entry smp_irq_move_cleanup_interrupt(void)
|
||||
{
|
||||
unsigned vector, me;
|
||||
|
||||
entering_ack_irq();
|
||||
|
||||
/* Prevent vectors vanishing under us */
|
||||
raw_spin_lock(&vector_lock);
|
||||
|
||||
me = smp_processor_id();
|
||||
for (vector = FIRST_EXTERNAL_VECTOR; vector < NR_VECTORS; vector++) {
|
||||
struct apic_chip_data *apicd;
|
||||
struct irq_desc *desc;
|
||||
unsigned int irr;
|
||||
|
||||
retry:
|
||||
desc = __this_cpu_read(vector_irq[vector]);
|
||||
if (IS_ERR_OR_NULL(desc))
|
||||
continue;
|
||||
|
||||
if (!raw_spin_trylock(&desc->lock)) {
|
||||
raw_spin_unlock(&vector_lock);
|
||||
cpu_relax();
|
||||
raw_spin_lock(&vector_lock);
|
||||
goto retry;
|
||||
}
|
||||
|
||||
apicd = apic_chip_data(irq_desc_get_irq_data(desc));
|
||||
if (!apicd)
|
||||
goto unlock;
|
||||
|
||||
/*
|
||||
* Nothing to cleanup if irq migration is in progress
|
||||
* or this cpu is not set in the cleanup mask.
|
||||
*/
|
||||
if (apicd->move_in_progress ||
|
||||
!cpumask_test_cpu(me, apicd->old_domain))
|
||||
goto unlock;
|
||||
|
||||
/*
|
||||
* We have two cases to handle here:
|
||||
* 1) vector is unchanged but the target mask got reduced
|
||||
* 2) vector and the target mask has changed
|
||||
*
|
||||
* #1 is obvious, but in #2 we have two vectors with the same
|
||||
* irq descriptor: the old and the new vector. So we need to
|
||||
* make sure that we only cleanup the old vector. The new
|
||||
* vector has the current @vector number in the config and
|
||||
* this cpu is part of the target mask. We better leave that
|
||||
* one alone.
|
||||
*/
|
||||
if (vector == apicd->cfg.vector &&
|
||||
cpumask_test_cpu(me, apicd->domain))
|
||||
goto unlock;
|
||||
|
||||
irr = apic_read(APIC_IRR + (vector / 32 * 0x10));
|
||||
/*
|
||||
* Check if the vector that needs to be cleanedup is
|
||||
* registered at the cpu's IRR. If so, then this is not
|
||||
* the best time to clean it up. Lets clean it up in the
|
||||
* next attempt by sending another IRQ_MOVE_CLEANUP_VECTOR
|
||||
* to myself.
|
||||
*/
|
||||
if (irr & (1 << (vector % 32))) {
|
||||
apic->send_IPI_self(IRQ_MOVE_CLEANUP_VECTOR);
|
||||
goto unlock;
|
||||
}
|
||||
__this_cpu_write(vector_irq[vector], VECTOR_UNUSED);
|
||||
cpumask_clear_cpu(me, apicd->old_domain);
|
||||
unlock:
|
||||
raw_spin_unlock(&desc->lock);
|
||||
}
|
||||
|
||||
raw_spin_unlock(&vector_lock);
|
||||
|
||||
exiting_irq();
|
||||
}
|
||||
|
||||
static void __irq_complete_move(struct irq_cfg *cfg, unsigned vector)
|
||||
{
|
||||
unsigned me;
|
||||
struct apic_chip_data *apicd;
|
||||
|
||||
apicd = container_of(cfg, struct apic_chip_data, cfg);
|
||||
if (likely(!apicd->move_in_progress))
|
||||
return;
|
||||
|
||||
me = smp_processor_id();
|
||||
if (vector == apicd->cfg.vector && cpumask_test_cpu(me, apicd->domain))
|
||||
if (vector == apicd->cfg.vector && apicd->cpu == smp_processor_id())
|
||||
__send_cleanup_vector(apicd);
|
||||
}
|
||||
|
||||
@ -686,10 +635,9 @@ void irq_complete_move(struct irq_cfg *cfg)
|
||||
*/
|
||||
void irq_force_complete_move(struct irq_desc *desc)
|
||||
{
|
||||
struct irq_data *irqd;
|
||||
struct apic_chip_data *apicd;
|
||||
struct irq_cfg *cfg;
|
||||
unsigned int cpu;
|
||||
struct irq_data *irqd;
|
||||
unsigned int vector;
|
||||
|
||||
/*
|
||||
* The function is called for all descriptors regardless of which
|
||||
@ -701,42 +649,30 @@ void irq_force_complete_move(struct irq_desc *desc)
|
||||
* (apic_chip_data) before touching it any further.
|
||||
*/
|
||||
irqd = irq_domain_get_irq_data(x86_vector_domain,
|
||||
irq_desc_get_irq(desc));
|
||||
irq_desc_get_irq(desc));
|
||||
if (!irqd)
|
||||
return;
|
||||
|
||||
raw_spin_lock(&vector_lock);
|
||||
apicd = apic_chip_data(irqd);
|
||||
cfg = apicd ? &apicd->cfg : NULL;
|
||||
|
||||
if (!cfg)
|
||||
return;
|
||||
if (!apicd)
|
||||
goto unlock;
|
||||
|
||||
/*
|
||||
* This is tricky. If the cleanup of @data->old_domain has not been
|
||||
* If old_vector is empty, no action required.
|
||||
*/
|
||||
vector = apicd->cfg.old_vector;
|
||||
if (!vector)
|
||||
goto unlock;
|
||||
|
||||
/*
|
||||
* This is tricky. If the cleanup of the old vector has not been
|
||||
* done yet, then the following setaffinity call will fail with
|
||||
* -EBUSY. This can leave the interrupt in a stale state.
|
||||
*
|
||||
* All CPUs are stuck in stop machine with interrupts disabled so
|
||||
* calling __irq_complete_move() would be completely pointless.
|
||||
*/
|
||||
raw_spin_lock(&vector_lock);
|
||||
/*
|
||||
* Clean out all offline cpus (including the outgoing one) from the
|
||||
* old_domain mask.
|
||||
*/
|
||||
cpumask_and(apicd->old_domain, apicd->old_domain, cpu_online_mask);
|
||||
|
||||
/*
|
||||
* If move_in_progress is cleared and the old_domain mask is empty,
|
||||
* then there is nothing to cleanup. fixup_irqs() will take care of
|
||||
* the stale vectors on the outgoing cpu.
|
||||
*/
|
||||
if (!apicd->move_in_progress && cpumask_empty(apicd->old_domain)) {
|
||||
raw_spin_unlock(&vector_lock);
|
||||
return;
|
||||
}
|
||||
|
||||
/*
|
||||
*
|
||||
* 1) The interrupt is in move_in_progress state. That means that we
|
||||
* have not seen an interrupt since the io_apic was reprogrammed to
|
||||
* the new vector.
|
||||
@ -778,18 +714,15 @@ void irq_force_complete_move(struct irq_desc *desc)
|
||||
* area arises.
|
||||
*/
|
||||
pr_warn("IRQ fixup: irq %d move in progress, old vector %d\n",
|
||||
irqd->irq, cfg->old_vector);
|
||||
irqd->irq, vector);
|
||||
}
|
||||
/*
|
||||
* If old_domain is not empty, then other cpus still have the irq
|
||||
* descriptor set in their vector array. Clean it up.
|
||||
*/
|
||||
for_each_cpu(cpu, apicd->old_domain)
|
||||
per_cpu(vector_irq, cpu)[cfg->old_vector] = VECTOR_UNUSED;
|
||||
|
||||
per_cpu(vector_irq, apicd->prev_cpu)[vector] = VECTOR_UNUSED;
|
||||
/* Cleanup the left overs of the (half finished) move */
|
||||
cpumask_clear(apicd->old_domain);
|
||||
apicd->cfg.old_vector = 0;
|
||||
apicd->move_in_progress = 0;
|
||||
hlist_del_init(&apicd->clist);
|
||||
unlock:
|
||||
raw_spin_unlock(&vector_lock);
|
||||
}
|
||||
#endif
|
||||
|
Loading…
Reference in New Issue
Block a user