commit c0af52437254fda8b0cdbaae5a9b6d9327f1fcd5 upstream. Commit 34c3d9819fda ("genirq/affinity: Provide smarter irq spreading infrastructure") introduced a better IRQ spreading mechanism, taking account of the available NUMA nodes in the machine. Problem is that the algorithm of retrieving the nodemask iterates "linearly" based on the number of online nodes - some architectures present non-linear node distribution among the nodemask, like PowerPC. If this is the case, the algorithm lead to a wrong node count number and therefore to a bad/incomplete IRQ affinity distribution. For example, this problem were found in a machine with 128 CPUs and two nodes, namely nodes 0 and 8 (instead of 0 and 1, if it was linearly distributed). This led to a wrong affinity distribution which then led to a bad mq allocation for nvme driver. Finally, we take the opportunity to fix a comment regarding the affinity distribution when we have _more_ nodes than vectors. Fixes: 34c3d9819fda ("genirq/affinity: Provide smarter irq spreading infrastructure") Reported-by: Gabriel Krisman Bertazi <gabriel@krisman.be> Signed-off-by: Guilherme G. Piccoli <gpiccoli@linux.vnet.ibm.com> Reviewed-by: Christoph Hellwig <hch@lst.de> Reviewed-by: Gabriel Krisman Bertazi <gabriel@krisman.be> Reviewed-by: Gavin Shan <gwshan@linux.vnet.ibm.com> Cc: linux-pci@vger.kernel.org Cc: linuxppc-dev@lists.ozlabs.org Cc: hch@lst.de Link: http://lkml.kernel.org/r/1481738472-2671-1-git-send-email-gpiccoli@linux.vnet.ibm.com Signed-off-by: Thomas Gleixner <tglx@linutronix.de> Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
155 lines
3.7 KiB
C
155 lines
3.7 KiB
C
|
|
#include <linux/interrupt.h>
|
|
#include <linux/kernel.h>
|
|
#include <linux/slab.h>
|
|
#include <linux/cpu.h>
|
|
|
|
static void irq_spread_init_one(struct cpumask *irqmsk, struct cpumask *nmsk,
|
|
int cpus_per_vec)
|
|
{
|
|
const struct cpumask *siblmsk;
|
|
int cpu, sibl;
|
|
|
|
for ( ; cpus_per_vec > 0; ) {
|
|
cpu = cpumask_first(nmsk);
|
|
|
|
/* Should not happen, but I'm too lazy to think about it */
|
|
if (cpu >= nr_cpu_ids)
|
|
return;
|
|
|
|
cpumask_clear_cpu(cpu, nmsk);
|
|
cpumask_set_cpu(cpu, irqmsk);
|
|
cpus_per_vec--;
|
|
|
|
/* If the cpu has siblings, use them first */
|
|
siblmsk = topology_sibling_cpumask(cpu);
|
|
for (sibl = -1; cpus_per_vec > 0; ) {
|
|
sibl = cpumask_next(sibl, siblmsk);
|
|
if (sibl >= nr_cpu_ids)
|
|
break;
|
|
if (!cpumask_test_and_clear_cpu(sibl, nmsk))
|
|
continue;
|
|
cpumask_set_cpu(sibl, irqmsk);
|
|
cpus_per_vec--;
|
|
}
|
|
}
|
|
}
|
|
|
|
static int get_nodes_in_cpumask(const struct cpumask *mask, nodemask_t *nodemsk)
|
|
{
|
|
int n, nodes = 0;
|
|
|
|
/* Calculate the number of nodes in the supplied affinity mask */
|
|
for_each_online_node(n) {
|
|
if (cpumask_intersects(mask, cpumask_of_node(n))) {
|
|
node_set(n, *nodemsk);
|
|
nodes++;
|
|
}
|
|
}
|
|
return nodes;
|
|
}
|
|
|
|
/**
|
|
* irq_create_affinity_masks - Create affinity masks for multiqueue spreading
|
|
* @affinity: The affinity mask to spread. If NULL cpu_online_mask
|
|
* is used
|
|
* @nvecs: The number of vectors
|
|
*
|
|
* Returns the masks pointer or NULL if allocation failed.
|
|
*/
|
|
struct cpumask *irq_create_affinity_masks(const struct cpumask *affinity,
|
|
int nvec)
|
|
{
|
|
int n, nodes, vecs_per_node, cpus_per_vec, extra_vecs, curvec = 0;
|
|
nodemask_t nodemsk = NODE_MASK_NONE;
|
|
struct cpumask *masks;
|
|
cpumask_var_t nmsk;
|
|
|
|
if (!zalloc_cpumask_var(&nmsk, GFP_KERNEL))
|
|
return NULL;
|
|
|
|
masks = kzalloc(nvec * sizeof(*masks), GFP_KERNEL);
|
|
if (!masks)
|
|
goto out;
|
|
|
|
/* Stabilize the cpumasks */
|
|
get_online_cpus();
|
|
/* If the supplied affinity mask is NULL, use cpu online mask */
|
|
if (!affinity)
|
|
affinity = cpu_online_mask;
|
|
|
|
nodes = get_nodes_in_cpumask(affinity, &nodemsk);
|
|
|
|
/*
|
|
* If the number of nodes in the mask is greater than or equal the
|
|
* number of vectors we just spread the vectors across the nodes.
|
|
*/
|
|
if (nvec <= nodes) {
|
|
for_each_node_mask(n, nodemsk) {
|
|
cpumask_copy(masks + curvec, cpumask_of_node(n));
|
|
if (++curvec == nvec)
|
|
break;
|
|
}
|
|
goto outonl;
|
|
}
|
|
|
|
/* Spread the vectors per node */
|
|
vecs_per_node = nvec / nodes;
|
|
/* Account for rounding errors */
|
|
extra_vecs = nvec - (nodes * vecs_per_node);
|
|
|
|
for_each_node_mask(n, nodemsk) {
|
|
int ncpus, v, vecs_to_assign = vecs_per_node;
|
|
|
|
/* Get the cpus on this node which are in the mask */
|
|
cpumask_and(nmsk, affinity, cpumask_of_node(n));
|
|
|
|
/* Calculate the number of cpus per vector */
|
|
ncpus = cpumask_weight(nmsk);
|
|
|
|
for (v = 0; curvec < nvec && v < vecs_to_assign; curvec++, v++) {
|
|
cpus_per_vec = ncpus / vecs_to_assign;
|
|
|
|
/* Account for extra vectors to compensate rounding errors */
|
|
if (extra_vecs) {
|
|
cpus_per_vec++;
|
|
if (!--extra_vecs)
|
|
vecs_per_node++;
|
|
}
|
|
irq_spread_init_one(masks + curvec, nmsk, cpus_per_vec);
|
|
}
|
|
|
|
if (curvec >= nvec)
|
|
break;
|
|
}
|
|
|
|
outonl:
|
|
put_online_cpus();
|
|
out:
|
|
free_cpumask_var(nmsk);
|
|
return masks;
|
|
}
|
|
|
|
/**
|
|
* irq_calc_affinity_vectors - Calculate to optimal number of vectors for a given affinity mask
|
|
* @affinity: The affinity mask to spread. If NULL cpu_online_mask
|
|
* is used
|
|
* @maxvec: The maximum number of vectors available
|
|
*/
|
|
int irq_calc_affinity_vectors(const struct cpumask *affinity, int maxvec)
|
|
{
|
|
int cpus, ret;
|
|
|
|
/* Stabilize the cpumasks */
|
|
get_online_cpus();
|
|
/* If the supplied affinity mask is NULL, use cpu online mask */
|
|
if (!affinity)
|
|
affinity = cpu_online_mask;
|
|
|
|
cpus = cpumask_weight(affinity);
|
|
ret = (cpus < maxvec) ? cpus : maxvec;
|
|
|
|
put_online_cpus();
|
|
return ret;
|
|
}
|