kernel/fork: group allocation/free of per-cpu counters for mm struct
A trivial execve scalability test which tries to be very friendly (statically linked binaries, all separate) is predominantly bottlenecked by back-to-back per-cpu counter allocations which serialize on global locks. Ease the pain by allocating and freeing them in one go. Bench can be found here: http://apollo.backplane.com/DFlyMisc/doexec.c $ cc -static -O2 -o static-doexec doexec.c $ ./static-doexec $(nproc) Even at a very modest scale of 26 cores (ops/s): before: 133543.63 after: 186061.81 (+39%) While with the patch these allocations remain a significant problem, the primary bottleneck shifts to page release handling. Signed-off-by: Mateusz Guzik <mjguzik@gmail.com> Link: https://lore.kernel.org/r/20230823050609.2228718-3-mjguzik@gmail.com [Dennis: reflowed 1 line] Signed-off-by: Dennis Zhou <dennis@kernel.org>
This commit is contained in:
parent
c439d5e8a0
commit
14ef95be6f
@ -909,8 +909,6 @@ static void cleanup_lazy_tlbs(struct mm_struct *mm)
|
||||
*/
|
||||
void __mmdrop(struct mm_struct *mm)
|
||||
{
|
||||
int i;
|
||||
|
||||
BUG_ON(mm == &init_mm);
|
||||
WARN_ON_ONCE(mm == current->mm);
|
||||
|
||||
@ -925,9 +923,8 @@ void __mmdrop(struct mm_struct *mm)
|
||||
put_user_ns(mm->user_ns);
|
||||
mm_pasid_drop(mm);
|
||||
mm_destroy_cid(mm);
|
||||
percpu_counter_destroy_many(mm->rss_stat, NR_MM_COUNTERS);
|
||||
|
||||
for (i = 0; i < NR_MM_COUNTERS; i++)
|
||||
percpu_counter_destroy(&mm->rss_stat[i]);
|
||||
free_mm(mm);
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(__mmdrop);
|
||||
@ -1252,8 +1249,6 @@ static void mm_init_uprobes_state(struct mm_struct *mm)
|
||||
static struct mm_struct *mm_init(struct mm_struct *mm, struct task_struct *p,
|
||||
struct user_namespace *user_ns)
|
||||
{
|
||||
int i;
|
||||
|
||||
mt_init_flags(&mm->mm_mt, MM_MT_FLAGS);
|
||||
mt_set_external_lock(&mm->mm_mt, &mm->mmap_lock);
|
||||
atomic_set(&mm->mm_users, 1);
|
||||
@ -1301,17 +1296,15 @@ static struct mm_struct *mm_init(struct mm_struct *mm, struct task_struct *p,
|
||||
if (mm_alloc_cid(mm))
|
||||
goto fail_cid;
|
||||
|
||||
for (i = 0; i < NR_MM_COUNTERS; i++)
|
||||
if (percpu_counter_init(&mm->rss_stat[i], 0, GFP_KERNEL_ACCOUNT))
|
||||
goto fail_pcpu;
|
||||
if (percpu_counter_init_many(mm->rss_stat, 0, GFP_KERNEL_ACCOUNT,
|
||||
NR_MM_COUNTERS))
|
||||
goto fail_pcpu;
|
||||
|
||||
mm->user_ns = get_user_ns(user_ns);
|
||||
lru_gen_init_mm(mm);
|
||||
return mm;
|
||||
|
||||
fail_pcpu:
|
||||
while (i > 0)
|
||||
percpu_counter_destroy(&mm->rss_stat[--i]);
|
||||
mm_destroy_cid(mm);
|
||||
fail_cid:
|
||||
destroy_context(mm);
|
||||
|
Loading…
Reference in New Issue
Block a user