49bd97c28b
Use the dedicated non-atomic helpers for {clear,set}_bit() and their test variants, i.e. the double-underscore versions. Depsite being defined in atomic.h, and despite the kernel versions being atomic in the kernel, tools' {clear,set}_bit() helpers aren't actually atomic. Move to the double-underscore versions so that the versions that are expected to be atomic (for kernel developers) can be made atomic without affecting users that don't want atomic operations. No functional change intended. Signed-off-by: Sean Christopherson <seanjc@google.com> Acked-by: Namhyung Kim <namhyung@kernel.org> Cc: Alexander Shishkin <alexander.shishkin@linux.intel.com> Cc: Andy Shevchenko <andriy.shevchenko@linux.intel.com> Cc: James Morse <james.morse@arm.com> Cc: Jiri Olsa <jolsa@kernel.org> Cc: Marc Zyngier <maz@kernel.org> Cc: Mark Rutland <mark.rutland@arm.com> Cc: Oliver Upton <oliver.upton@linux.dev> Cc: Paolo Bonzini <pbonzini@redhat.com> Cc: Peter Zijlstra <peterz@infradead.org> Cc: Rasmus Villemoes <linux@rasmusvillemoes.dk> Cc: Sean Christopherson <seanjc@google.com> Cc: Suzuki Poulouse <suzuki.poulose@arm.com> Cc: Yury Norov <yury.norov@gmail.com> Cc: alexandru elisei <alexandru.elisei@arm.com> Cc: kvm@vger.kernel.org Cc: kvmarm@lists.cs.columbia.edu Cc: kvmarm@lists.linux.dev Cc: linux-arm-kernel@lists.infradead.org Link: http://lore.kernel.org/lkml/20221119013450.2643007-6-seanjc@google.com Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
86 lines
2.0 KiB
C
86 lines
2.0 KiB
C
// SPDX-License-Identifier: GPL-2.0
|
|
/* Manage affinity to optimize IPIs inside the kernel perf API. */
|
|
#define _GNU_SOURCE 1
|
|
#include <sched.h>
|
|
#include <stdlib.h>
|
|
#include <linux/bitmap.h>
|
|
#include <linux/zalloc.h>
|
|
#include "perf.h"
|
|
#include "cpumap.h"
|
|
#include "affinity.h"
|
|
|
|
static int get_cpu_set_size(void)
|
|
{
|
|
int sz = cpu__max_cpu().cpu + 8 - 1;
|
|
/*
|
|
* sched_getaffinity doesn't like masks smaller than the kernel.
|
|
* Hopefully that's big enough.
|
|
*/
|
|
if (sz < 4096)
|
|
sz = 4096;
|
|
return sz / 8;
|
|
}
|
|
|
|
int affinity__setup(struct affinity *a)
|
|
{
|
|
int cpu_set_size = get_cpu_set_size();
|
|
|
|
a->orig_cpus = bitmap_zalloc(cpu_set_size * 8);
|
|
if (!a->orig_cpus)
|
|
return -1;
|
|
sched_getaffinity(0, cpu_set_size, (cpu_set_t *)a->orig_cpus);
|
|
a->sched_cpus = bitmap_zalloc(cpu_set_size * 8);
|
|
if (!a->sched_cpus) {
|
|
zfree(&a->orig_cpus);
|
|
return -1;
|
|
}
|
|
bitmap_zero((unsigned long *)a->sched_cpus, cpu_set_size);
|
|
a->changed = false;
|
|
return 0;
|
|
}
|
|
|
|
/*
|
|
* perf_event_open does an IPI internally to the target CPU.
|
|
* It is more efficient to change perf's affinity to the target
|
|
* CPU and then set up all events on that CPU, so we amortize
|
|
* CPU communication.
|
|
*/
|
|
void affinity__set(struct affinity *a, int cpu)
|
|
{
|
|
int cpu_set_size = get_cpu_set_size();
|
|
|
|
/*
|
|
* Return:
|
|
* - if cpu is -1
|
|
* - restrict out of bound access to sched_cpus
|
|
*/
|
|
if (cpu == -1 || ((cpu >= (cpu_set_size * 8))))
|
|
return;
|
|
|
|
a->changed = true;
|
|
__set_bit(cpu, a->sched_cpus);
|
|
/*
|
|
* We ignore errors because affinity is just an optimization.
|
|
* This could happen for example with isolated CPUs or cpusets.
|
|
* In this case the IPIs inside the kernel's perf API still work.
|
|
*/
|
|
sched_setaffinity(0, cpu_set_size, (cpu_set_t *)a->sched_cpus);
|
|
__clear_bit(cpu, a->sched_cpus);
|
|
}
|
|
|
|
static void __affinity__cleanup(struct affinity *a)
|
|
{
|
|
int cpu_set_size = get_cpu_set_size();
|
|
|
|
if (a->changed)
|
|
sched_setaffinity(0, cpu_set_size, (cpu_set_t *)a->orig_cpus);
|
|
zfree(&a->sched_cpus);
|
|
zfree(&a->orig_cpus);
|
|
}
|
|
|
|
void affinity__cleanup(struct affinity *a)
|
|
{
|
|
if (a != NULL)
|
|
__affinity__cleanup(a);
|
|
}
|