percpu_counter: add a cmpxchg-based _add_batch variant
Interrupt disable/enable trips are quite expensive on x86-64 compared to a mere cmpxchg (note: no lock prefix!) and percpu counters are used quite often. With this change I get a bump of 1% ops/s for negative path lookups, plugged into will-it-scale: void testcase(unsigned long long *iterations, unsigned long nr) { while (1) { int fd = open("/tmp/nonexistent", O_RDONLY); assert(fd == -1); (*iterations)++; } } The win would be higher if it was not for other slowdowns, but one has to start somewhere. Link: https://lkml.kernel.org/r/20240528204257.434817-1-mjguzik@gmail.com Signed-off-by: Mateusz Guzik <mjguzik@gmail.com> Acked-by: Vlastimil Babka <vbabka@suse.cz> Acked-by: Dennis Zhou <dennis@kernel.org> Cc: Hugh Dickins <hughd@google.com> Cc: Tejun Heo <tj@kernel.org> Cc: Vlastimil Babka <vbabka@suse.cz> Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
This commit is contained in:
parent
727759d748
commit
51d821654b
@ -73,17 +73,50 @@ void percpu_counter_set(struct percpu_counter *fbc, s64 amount)
|
|||||||
EXPORT_SYMBOL(percpu_counter_set);
|
EXPORT_SYMBOL(percpu_counter_set);
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* local_irq_save() is needed to make the function irq safe:
|
* Add to a counter while respecting batch size.
|
||||||
* - The slow path would be ok as protected by an irq-safe spinlock.
|
*
|
||||||
* - this_cpu_add would be ok as it is irq-safe by definition.
|
* There are 2 implementations, both dealing with the following problem:
|
||||||
* But:
|
*
|
||||||
* The decision slow path/fast path and the actual update must be atomic, too.
|
* The decision slow path/fast path and the actual update must be atomic.
|
||||||
* Otherwise a call in process context could check the current values and
|
* Otherwise a call in process context could check the current values and
|
||||||
* decide that the fast path can be used. If now an interrupt occurs before
|
* decide that the fast path can be used. If now an interrupt occurs before
|
||||||
* the this_cpu_add(), and the interrupt updates this_cpu(*fbc->counters),
|
* the this_cpu_add(), and the interrupt updates this_cpu(*fbc->counters),
|
||||||
* then the this_cpu_add() that is executed after the interrupt has completed
|
* then the this_cpu_add() that is executed after the interrupt has completed
|
||||||
* can produce values larger than "batch" or even overflows.
|
* can produce values larger than "batch" or even overflows.
|
||||||
*/
|
*/
|
||||||
|
#ifdef CONFIG_HAVE_CMPXCHG_LOCAL
|
||||||
|
/*
|
||||||
|
* Safety against interrupts is achieved in 2 ways:
|
||||||
|
* 1. the fast path uses local cmpxchg (note: no lock prefix)
|
||||||
|
* 2. the slow path operates with interrupts disabled
|
||||||
|
*/
|
||||||
|
void percpu_counter_add_batch(struct percpu_counter *fbc, s64 amount, s32 batch)
|
||||||
|
{
|
||||||
|
s64 count;
|
||||||
|
unsigned long flags;
|
||||||
|
|
||||||
|
count = this_cpu_read(*fbc->counters);
|
||||||
|
do {
|
||||||
|
if (unlikely(abs(count + amount) >= batch)) {
|
||||||
|
raw_spin_lock_irqsave(&fbc->lock, flags);
|
||||||
|
/*
|
||||||
|
* Note: by now we might have migrated to another CPU
|
||||||
|
* or the value might have changed.
|
||||||
|
*/
|
||||||
|
count = __this_cpu_read(*fbc->counters);
|
||||||
|
fbc->count += count + amount;
|
||||||
|
__this_cpu_sub(*fbc->counters, count);
|
||||||
|
raw_spin_unlock_irqrestore(&fbc->lock, flags);
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
} while (!this_cpu_try_cmpxchg(*fbc->counters, &count, count + amount));
|
||||||
|
}
|
||||||
|
#else
|
||||||
|
/*
|
||||||
|
* local_irq_save() is used to make the function irq safe:
|
||||||
|
* - The slow path would be ok as protected by an irq-safe spinlock.
|
||||||
|
* - this_cpu_add would be ok as it is irq-safe by definition.
|
||||||
|
*/
|
||||||
void percpu_counter_add_batch(struct percpu_counter *fbc, s64 amount, s32 batch)
|
void percpu_counter_add_batch(struct percpu_counter *fbc, s64 amount, s32 batch)
|
||||||
{
|
{
|
||||||
s64 count;
|
s64 count;
|
||||||
@ -101,6 +134,7 @@ void percpu_counter_add_batch(struct percpu_counter *fbc, s64 amount, s32 batch)
|
|||||||
}
|
}
|
||||||
local_irq_restore(flags);
|
local_irq_restore(flags);
|
||||||
}
|
}
|
||||||
|
#endif
|
||||||
EXPORT_SYMBOL(percpu_counter_add_batch);
|
EXPORT_SYMBOL(percpu_counter_add_batch);
|
||||||
|
|
||||||
/*
|
/*
|
||||||
|
Loading…
Reference in New Issue
Block a user