x86: remove irqbalance in kernel for 32 bit
This has been deprecated for years, the user space irqbalanced utility works better with numa, has configurable policies, etc... Signed-off-by: Yinghai Lu <yhlu.kernel@gmai.com> Signed-off-by: Ingo Molnar <mingo@elte.hu>
This commit is contained in:
parent
6d50bc2683
commit
8b8e8c1bf7
@ -1254,14 +1254,6 @@ config EFI
|
||||
resultant kernel should continue to boot on existing non-EFI
|
||||
platforms.
|
||||
|
||||
config IRQBALANCE
|
||||
def_bool y
|
||||
prompt "Enable kernel irq balancing"
|
||||
depends on X86_32 && SMP && X86_IO_APIC
|
||||
help
|
||||
The default yes will allow the kernel to do irq load balancing.
|
||||
Saying no will keep the kernel from doing irq load balancing.
|
||||
|
||||
config SECCOMP
|
||||
def_bool y
|
||||
prompt "Enable seccomp to safely compute untrusted bytecode"
|
||||
|
@ -287,7 +287,6 @@ CONFIG_MTRR=y
|
||||
# CONFIG_MTRR_SANITIZER is not set
|
||||
CONFIG_X86_PAT=y
|
||||
CONFIG_EFI=y
|
||||
# CONFIG_IRQBALANCE is not set
|
||||
CONFIG_SECCOMP=y
|
||||
# CONFIG_HZ_100 is not set
|
||||
# CONFIG_HZ_250 is not set
|
||||
|
@ -371,408 +371,6 @@ static void set_ioapic_affinity_irq(unsigned int irq, cpumask_t cpumask)
|
||||
spin_unlock_irqrestore(&ioapic_lock, flags);
|
||||
}
|
||||
|
||||
#if defined(CONFIG_IRQBALANCE)
|
||||
# include <asm/processor.h> /* kernel_thread() */
|
||||
# include <linux/kernel_stat.h> /* kstat */
|
||||
# include <linux/slab.h> /* kmalloc() */
|
||||
# include <linux/timer.h>
|
||||
|
||||
#define IRQBALANCE_CHECK_ARCH -999
|
||||
#define MAX_BALANCED_IRQ_INTERVAL (5*HZ)
|
||||
#define MIN_BALANCED_IRQ_INTERVAL (HZ/2)
|
||||
#define BALANCED_IRQ_MORE_DELTA (HZ/10)
|
||||
#define BALANCED_IRQ_LESS_DELTA (HZ)
|
||||
|
||||
static int irqbalance_disabled __read_mostly = IRQBALANCE_CHECK_ARCH;
|
||||
static int physical_balance __read_mostly;
|
||||
static long balanced_irq_interval __read_mostly = MAX_BALANCED_IRQ_INTERVAL;
|
||||
|
||||
static struct irq_cpu_info {
|
||||
unsigned long *last_irq;
|
||||
unsigned long *irq_delta;
|
||||
unsigned long irq;
|
||||
} irq_cpu_data[NR_CPUS];
|
||||
|
||||
#define CPU_IRQ(cpu) (irq_cpu_data[cpu].irq)
|
||||
#define LAST_CPU_IRQ(cpu, irq) (irq_cpu_data[cpu].last_irq[irq])
|
||||
#define IRQ_DELTA(cpu, irq) (irq_cpu_data[cpu].irq_delta[irq])
|
||||
|
||||
#define IDLE_ENOUGH(cpu,now) \
|
||||
(idle_cpu(cpu) && ((now) - per_cpu(irq_stat, (cpu)).idle_timestamp > 1))
|
||||
|
||||
#define IRQ_ALLOWED(cpu, allowed_mask) cpu_isset(cpu, allowed_mask)
|
||||
|
||||
#define CPU_TO_PACKAGEINDEX(i) (first_cpu(per_cpu(cpu_sibling_map, i)))
|
||||
|
||||
static cpumask_t balance_irq_affinity_init __initdata = CPU_MASK_ALL;
|
||||
|
||||
static cpumask_t *balance_irq_affinity;
|
||||
|
||||
|
||||
static void __init irq_affinity_init_work(void *data)
|
||||
{
|
||||
struct dyn_array *da = data;
|
||||
|
||||
int i;
|
||||
struct balance_irq_affinity *affinity;
|
||||
|
||||
affinity = *da->name;
|
||||
|
||||
for (i = 0; i < *da->nr; i++)
|
||||
memcpy(&affinity[i], &balance_irq_affinity_init,
|
||||
sizeof(struct balance_irq_affinity));
|
||||
|
||||
}
|
||||
|
||||
DEFINE_DYN_ARRAY(balance_irq_affinity, sizeof(struct balance_irq_affinity), nr_irqs, PAGE_SIZE, irq_affinity_init_work);
|
||||
|
||||
|
||||
void set_balance_irq_affinity(unsigned int irq, cpumask_t mask)
|
||||
{
|
||||
balance_irq_affinity[irq] = mask;
|
||||
}
|
||||
|
||||
static unsigned long move(int curr_cpu, cpumask_t allowed_mask,
|
||||
unsigned long now, int direction)
|
||||
{
|
||||
int search_idle = 1;
|
||||
int cpu = curr_cpu;
|
||||
|
||||
goto inside;
|
||||
|
||||
do {
|
||||
if (unlikely(cpu == curr_cpu))
|
||||
search_idle = 0;
|
||||
inside:
|
||||
if (direction == 1) {
|
||||
cpu++;
|
||||
if (cpu >= NR_CPUS)
|
||||
cpu = 0;
|
||||
} else {
|
||||
cpu--;
|
||||
if (cpu == -1)
|
||||
cpu = NR_CPUS-1;
|
||||
}
|
||||
} while (!cpu_online(cpu) || !IRQ_ALLOWED(cpu, allowed_mask) ||
|
||||
(search_idle && !IDLE_ENOUGH(cpu, now)));
|
||||
|
||||
return cpu;
|
||||
}
|
||||
|
||||
static inline void balance_irq(int cpu, int irq)
|
||||
{
|
||||
unsigned long now = jiffies;
|
||||
cpumask_t allowed_mask;
|
||||
unsigned int new_cpu;
|
||||
|
||||
if (irqbalance_disabled)
|
||||
return;
|
||||
|
||||
cpus_and(allowed_mask, cpu_online_map, balance_irq_affinity[irq]);
|
||||
new_cpu = move(cpu, allowed_mask, now, 1);
|
||||
if (cpu != new_cpu)
|
||||
set_pending_irq(irq, cpumask_of_cpu(new_cpu));
|
||||
}
|
||||
|
||||
static inline void rotate_irqs_among_cpus(unsigned long useful_load_threshold)
|
||||
{
|
||||
int i, j;
|
||||
struct irq_desc *desc;
|
||||
|
||||
for_each_online_cpu(i) {
|
||||
for (j = 0; j < nr_irqs; j++) {
|
||||
desc = irq_to_desc(j);
|
||||
if (!desc->action)
|
||||
continue;
|
||||
/* Is it a significant load ? */
|
||||
if (IRQ_DELTA(CPU_TO_PACKAGEINDEX(i), j) <
|
||||
useful_load_threshold)
|
||||
continue;
|
||||
balance_irq(i, j);
|
||||
}
|
||||
}
|
||||
balanced_irq_interval = max((long)MIN_BALANCED_IRQ_INTERVAL,
|
||||
balanced_irq_interval - BALANCED_IRQ_LESS_DELTA);
|
||||
return;
|
||||
}
|
||||
|
||||
static void do_irq_balance(void)
|
||||
{
|
||||
int i, j;
|
||||
unsigned long max_cpu_irq = 0, min_cpu_irq = (~0);
|
||||
unsigned long move_this_load = 0;
|
||||
int max_loaded = 0, min_loaded = 0;
|
||||
int load;
|
||||
unsigned long useful_load_threshold = balanced_irq_interval + 10;
|
||||
int selected_irq;
|
||||
int tmp_loaded, first_attempt = 1;
|
||||
unsigned long tmp_cpu_irq;
|
||||
unsigned long imbalance = 0;
|
||||
cpumask_t allowed_mask, target_cpu_mask, tmp;
|
||||
struct irq_desc *desc;
|
||||
|
||||
for_each_possible_cpu(i) {
|
||||
int package_index;
|
||||
CPU_IRQ(i) = 0;
|
||||
if (!cpu_online(i))
|
||||
continue;
|
||||
package_index = CPU_TO_PACKAGEINDEX(i);
|
||||
for (j = 0; j < nr_irqs; j++) {
|
||||
unsigned long value_now, delta;
|
||||
/* Is this an active IRQ or balancing disabled ? */
|
||||
desc = irq_to_desc(j);
|
||||
if (!desc->action || irq_balancing_disabled(j))
|
||||
continue;
|
||||
if (package_index == i)
|
||||
IRQ_DELTA(package_index, j) = 0;
|
||||
/* Determine the total count per processor per IRQ */
|
||||
value_now = (unsigned long) kstat_irqs_cpu(j, i);
|
||||
|
||||
/* Determine the activity per processor per IRQ */
|
||||
delta = value_now - LAST_CPU_IRQ(i, j);
|
||||
|
||||
/* Update last_cpu_irq[][] for the next time */
|
||||
LAST_CPU_IRQ(i, j) = value_now;
|
||||
|
||||
/* Ignore IRQs whose rate is less than the clock */
|
||||
if (delta < useful_load_threshold)
|
||||
continue;
|
||||
/* update the load for the processor or package total */
|
||||
IRQ_DELTA(package_index, j) += delta;
|
||||
|
||||
/* Keep track of the higher numbered sibling as well */
|
||||
if (i != package_index)
|
||||
CPU_IRQ(i) += delta;
|
||||
/*
|
||||
* We have sibling A and sibling B in the package
|
||||
*
|
||||
* cpu_irq[A] = load for cpu A + load for cpu B
|
||||
* cpu_irq[B] = load for cpu B
|
||||
*/
|
||||
CPU_IRQ(package_index) += delta;
|
||||
}
|
||||
}
|
||||
/* Find the least loaded processor package */
|
||||
for_each_online_cpu(i) {
|
||||
if (i != CPU_TO_PACKAGEINDEX(i))
|
||||
continue;
|
||||
if (min_cpu_irq > CPU_IRQ(i)) {
|
||||
min_cpu_irq = CPU_IRQ(i);
|
||||
min_loaded = i;
|
||||
}
|
||||
}
|
||||
max_cpu_irq = ULONG_MAX;
|
||||
|
||||
tryanothercpu:
|
||||
/*
|
||||
* Look for heaviest loaded processor.
|
||||
* We may come back to get the next heaviest loaded processor.
|
||||
* Skip processors with trivial loads.
|
||||
*/
|
||||
tmp_cpu_irq = 0;
|
||||
tmp_loaded = -1;
|
||||
for_each_online_cpu(i) {
|
||||
if (i != CPU_TO_PACKAGEINDEX(i))
|
||||
continue;
|
||||
if (max_cpu_irq <= CPU_IRQ(i))
|
||||
continue;
|
||||
if (tmp_cpu_irq < CPU_IRQ(i)) {
|
||||
tmp_cpu_irq = CPU_IRQ(i);
|
||||
tmp_loaded = i;
|
||||
}
|
||||
}
|
||||
|
||||
if (tmp_loaded == -1) {
|
||||
/*
|
||||
* In the case of small number of heavy interrupt sources,
|
||||
* loading some of the cpus too much. We use Ingo's original
|
||||
* approach to rotate them around.
|
||||
*/
|
||||
if (!first_attempt && imbalance >= useful_load_threshold) {
|
||||
rotate_irqs_among_cpus(useful_load_threshold);
|
||||
return;
|
||||
}
|
||||
goto not_worth_the_effort;
|
||||
}
|
||||
|
||||
first_attempt = 0; /* heaviest search */
|
||||
max_cpu_irq = tmp_cpu_irq; /* load */
|
||||
max_loaded = tmp_loaded; /* processor */
|
||||
imbalance = (max_cpu_irq - min_cpu_irq) / 2;
|
||||
|
||||
/*
|
||||
* if imbalance is less than approx 10% of max load, then
|
||||
* observe diminishing returns action. - quit
|
||||
*/
|
||||
if (imbalance < (max_cpu_irq >> 3))
|
||||
goto not_worth_the_effort;
|
||||
|
||||
tryanotherirq:
|
||||
/* if we select an IRQ to move that can't go where we want, then
|
||||
* see if there is another one to try.
|
||||
*/
|
||||
move_this_load = 0;
|
||||
selected_irq = -1;
|
||||
for (j = 0; j < nr_irqs; j++) {
|
||||
/* Is this an active IRQ? */
|
||||
desc = irq_to_desc(j);
|
||||
if (!desc->action)
|
||||
continue;
|
||||
if (imbalance <= IRQ_DELTA(max_loaded, j))
|
||||
continue;
|
||||
/* Try to find the IRQ that is closest to the imbalance
|
||||
* without going over.
|
||||
*/
|
||||
if (move_this_load < IRQ_DELTA(max_loaded, j)) {
|
||||
move_this_load = IRQ_DELTA(max_loaded, j);
|
||||
selected_irq = j;
|
||||
}
|
||||
}
|
||||
if (selected_irq == -1)
|
||||
goto tryanothercpu;
|
||||
|
||||
imbalance = move_this_load;
|
||||
|
||||
/* For physical_balance case, we accumulated both load
|
||||
* values in the one of the siblings cpu_irq[],
|
||||
* to use the same code for physical and logical processors
|
||||
* as much as possible.
|
||||
*
|
||||
* NOTE: the cpu_irq[] array holds the sum of the load for
|
||||
* sibling A and sibling B in the slot for the lowest numbered
|
||||
* sibling (A), _AND_ the load for sibling B in the slot for
|
||||
* the higher numbered sibling.
|
||||
*
|
||||
* We seek the least loaded sibling by making the comparison
|
||||
* (A+B)/2 vs B
|
||||
*/
|
||||
load = CPU_IRQ(min_loaded) >> 1;
|
||||
for_each_cpu_mask(j, per_cpu(cpu_sibling_map, min_loaded)) {
|
||||
if (load > CPU_IRQ(j)) {
|
||||
/* This won't change cpu_sibling_map[min_loaded] */
|
||||
load = CPU_IRQ(j);
|
||||
min_loaded = j;
|
||||
}
|
||||
}
|
||||
|
||||
cpus_and(allowed_mask,
|
||||
cpu_online_map,
|
||||
balance_irq_affinity[selected_irq]);
|
||||
target_cpu_mask = cpumask_of_cpu(min_loaded);
|
||||
cpus_and(tmp, target_cpu_mask, allowed_mask);
|
||||
|
||||
if (!cpus_empty(tmp)) {
|
||||
/* mark for change destination */
|
||||
set_pending_irq(selected_irq, cpumask_of_cpu(min_loaded));
|
||||
|
||||
/* Since we made a change, come back sooner to
|
||||
* check for more variation.
|
||||
*/
|
||||
balanced_irq_interval = max((long)MIN_BALANCED_IRQ_INTERVAL,
|
||||
balanced_irq_interval - BALANCED_IRQ_LESS_DELTA);
|
||||
return;
|
||||
}
|
||||
goto tryanotherirq;
|
||||
|
||||
not_worth_the_effort:
|
||||
/*
|
||||
* if we did not find an IRQ to move, then adjust the time interval
|
||||
* upward
|
||||
*/
|
||||
balanced_irq_interval = min((long)MAX_BALANCED_IRQ_INTERVAL,
|
||||
balanced_irq_interval + BALANCED_IRQ_MORE_DELTA);
|
||||
return;
|
||||
}
|
||||
|
||||
static int balanced_irq(void *unused)
|
||||
{
|
||||
int i;
|
||||
unsigned long prev_balance_time = jiffies;
|
||||
long time_remaining = balanced_irq_interval;
|
||||
struct irq_desc *desc;
|
||||
|
||||
/* push everything to CPU 0 to give us a starting point. */
|
||||
for (i = 0 ; i < nr_irqs ; i++) {
|
||||
desc = irq_to_desc(i);
|
||||
desc->pending_mask = cpumask_of_cpu(0);
|
||||
set_pending_irq(i, cpumask_of_cpu(0));
|
||||
}
|
||||
|
||||
set_freezable();
|
||||
for ( ; ; ) {
|
||||
time_remaining = schedule_timeout_interruptible(time_remaining);
|
||||
try_to_freeze();
|
||||
if (time_after(jiffies,
|
||||
prev_balance_time+balanced_irq_interval)) {
|
||||
preempt_disable();
|
||||
do_irq_balance();
|
||||
prev_balance_time = jiffies;
|
||||
time_remaining = balanced_irq_interval;
|
||||
preempt_enable();
|
||||
}
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int __init balanced_irq_init(void)
|
||||
{
|
||||
int i;
|
||||
struct cpuinfo_x86 *c;
|
||||
cpumask_t tmp;
|
||||
|
||||
cpus_shift_right(tmp, cpu_online_map, 2);
|
||||
c = &boot_cpu_data;
|
||||
/* When not overwritten by the command line ask subarchitecture. */
|
||||
if (irqbalance_disabled == IRQBALANCE_CHECK_ARCH)
|
||||
irqbalance_disabled = NO_BALANCE_IRQ;
|
||||
if (irqbalance_disabled)
|
||||
return 0;
|
||||
|
||||
/* disable irqbalance completely if there is only one processor online */
|
||||
if (num_online_cpus() < 2) {
|
||||
irqbalance_disabled = 1;
|
||||
return 0;
|
||||
}
|
||||
/*
|
||||
* Enable physical balance only if more than 1 physical processor
|
||||
* is present
|
||||
*/
|
||||
if (smp_num_siblings > 1 && !cpus_empty(tmp))
|
||||
physical_balance = 1;
|
||||
|
||||
for_each_online_cpu(i) {
|
||||
irq_cpu_data[i].irq_delta = kzalloc(sizeof(unsigned long) * nr_irqs, GFP_KERNEL);
|
||||
irq_cpu_data[i].last_irq = kzalloc(sizeof(unsigned long) * nr_irqs, GFP_KERNEL);
|
||||
if (irq_cpu_data[i].irq_delta == NULL || irq_cpu_data[i].last_irq == NULL) {
|
||||
printk(KERN_ERR "balanced_irq_init: out of memory");
|
||||
goto failed;
|
||||
}
|
||||
}
|
||||
|
||||
printk(KERN_INFO "Starting balanced_irq\n");
|
||||
if (!IS_ERR(kthread_run(balanced_irq, NULL, "kirqd")))
|
||||
return 0;
|
||||
printk(KERN_ERR "balanced_irq_init: failed to spawn balanced_irq");
|
||||
failed:
|
||||
for_each_possible_cpu(i) {
|
||||
kfree(irq_cpu_data[i].irq_delta);
|
||||
irq_cpu_data[i].irq_delta = NULL;
|
||||
kfree(irq_cpu_data[i].last_irq);
|
||||
irq_cpu_data[i].last_irq = NULL;
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
int __devinit irqbalance_disable(char *str)
|
||||
{
|
||||
irqbalance_disabled = 1;
|
||||
return 1;
|
||||
}
|
||||
|
||||
__setup("noirqbalance", irqbalance_disable);
|
||||
|
||||
late_initcall(balanced_irq_init);
|
||||
#endif /* CONFIG_IRQBALANCE */
|
||||
#endif /* CONFIG_SMP */
|
||||
|
||||
#ifndef CONFIG_SMP
|
||||
|
@ -35,9 +35,6 @@ static void __devinit quirk_intel_irqbalance(struct pci_dev *dev)
|
||||
if (!(word & (1 << 13))) {
|
||||
dev_info(&dev->dev, "Intel E7520/7320/7525 detected; "
|
||||
"disabling irq balancing and affinity\n");
|
||||
#ifdef CONFIG_IRQBALANCE
|
||||
irqbalance_disable("");
|
||||
#endif
|
||||
noirqdebug_setup("");
|
||||
#ifdef CONFIG_PROC_FS
|
||||
no_irq_affinity = 1;
|
||||
|
@ -185,7 +185,7 @@ struct irq_desc {
|
||||
cpumask_t affinity;
|
||||
unsigned int cpu;
|
||||
#endif
|
||||
#if defined(CONFIG_GENERIC_PENDING_IRQ) || defined(CONFIG_IRQBALANCE)
|
||||
#ifdef CONFIG_GENERIC_PENDING_IRQ
|
||||
cpumask_t pending_mask;
|
||||
#endif
|
||||
#ifdef CONFIG_PROC_FS
|
||||
@ -241,13 +241,13 @@ extern int setup_irq(unsigned int irq, struct irqaction *new);
|
||||
|
||||
#ifdef CONFIG_SMP
|
||||
|
||||
#if defined(CONFIG_GENERIC_PENDING_IRQ) || defined(CONFIG_IRQBALANCE)
|
||||
#ifdef CONFIG_GENERIC_PENDING_IRQ
|
||||
|
||||
void set_pending_irq(unsigned int irq, cpumask_t mask);
|
||||
void move_native_irq(int irq);
|
||||
void move_masked_irq(int irq);
|
||||
|
||||
#else /* CONFIG_GENERIC_PENDING_IRQ || CONFIG_IRQBALANCE */
|
||||
#else /* CONFIG_GENERIC_PENDING_IRQ */
|
||||
|
||||
static inline void move_irq(int irq)
|
||||
{
|
||||
@ -274,14 +274,6 @@ static inline void set_pending_irq(unsigned int irq, cpumask_t mask)
|
||||
|
||||
#endif /* CONFIG_SMP */
|
||||
|
||||
#ifdef CONFIG_IRQBALANCE
|
||||
extern void set_balance_irq_affinity(unsigned int irq, cpumask_t mask);
|
||||
#else
|
||||
static inline void set_balance_irq_affinity(unsigned int irq, cpumask_t mask)
|
||||
{
|
||||
}
|
||||
#endif
|
||||
|
||||
extern int no_irq_affinity;
|
||||
|
||||
static inline int irq_balancing_disabled(unsigned int irq)
|
||||
|
@ -86,8 +86,6 @@ int irq_set_affinity(unsigned int irq, cpumask_t cpumask)
|
||||
if (!desc->chip->set_affinity)
|
||||
return -EINVAL;
|
||||
|
||||
set_balance_irq_affinity(irq, cpumask);
|
||||
|
||||
#ifdef CONFIG_GENERIC_PENDING_IRQ
|
||||
if (desc->status & IRQ_MOVE_PCNTXT) {
|
||||
unsigned long flags;
|
||||
@ -122,7 +120,6 @@ int irq_select_affinity(unsigned int irq)
|
||||
desc->affinity = mask;
|
||||
desc->chip->set_affinity(irq, mask);
|
||||
|
||||
set_balance_irq_affinity(irq, mask);
|
||||
return 0;
|
||||
}
|
||||
#endif
|
||||
|
Loading…
Reference in New Issue
Block a user