x86/apic related update:
- A single commit which reduces cacheline misses in __x2apic_send_IPI_mask() significantly by converting x86_cpu_to_logical_apicid() to an array instead of using per CPU storage. This reduces the cost for a full broadcast on a dual socket system with 256 CPUs from 33 down to 11 microseconds. -----BEGIN PGP SIGNATURE----- iQJHBAABCgAxFiEEQp8+kY+LLUocC4bMphj1TA10mKEFAmF/Ia8THHRnbHhAbGlu dXRyb25peC5kZQAKCRCmGPVMDXSYoeL1D/0fna8gVHWaVGUexB7HDF7G+WTuTnl0 7A7n7Yt3fqNirxfxgPtWpWHSbJfhB1jSxbjEirTOCDHGC47au6Hh1HICtpRqOMoY oH5ZXpDdiQnpl3MnGys6J5StVZCyWPz/pIIqmqO99fd5Ykolbf9UNjXiBW+1jBfO dS7+av9dAhTbVnbJfTDR5fhl81nM04eyVIaMn7rq3Z6VDfFFaugvv/HmpzfBRwxC o/hiN/e6T3DMtz2zXOXxrw+xdBp62sAhlrx1FDB3OYVcBb1cVK1gWTMoY+GOUd5y 5SiDoklzZXp4s/MT1qBxRvo7PRcL6SwwWtePBgYCL1oLfHE5Ctx/xu1jhfrH15IT jgykscSsyX4cRZfNXZcFLu8/EZmD/hT5xRQn4VkG2gnjm/SJ/U5m7cBOF8YWdiKs YRChHnpHN1fvxsKtLw3ZtgNT9HgqvIl4AvhweNe64fHFuQNefxVgvNAxgnNe03dr OGMbBPoX8AfSr2cxkhnprByMbcLa7aYAyAu0WkbrVar8ZB4uOPApnnIfMSvYgRtn 0pP9G+kyRQMc57Wq0NzQWLnhgbQuZlAo5zzvVjju0FGUjFDWOG3G6I58AqsgYA0i YDbBIOSVCnXVCFFV7i99M8U2Z4n4Cj4Paf8UjRFi6H9164AyVYVZ0kMBQqX2Luj1 8UYlESTByyO7Xw== =7l4b -----END PGP SIGNATURE----- Merge tag 'x86-apic-2021-11-01' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip Pull x86/apic update from Thomas Gleixner: "A single commit which reduces cache misses in __x2apic_send_IPI_mask() significantly by converting x86_cpu_to_logical_apicid() to an array instead of using per CPU storage. This reduces the cost for a full broadcast on a dual socket system with 256 CPUs from 33 down to 11 microseconds" * tag 'x86-apic-2021-11-01' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip: x86/apic: Reduce cache line misses in __x2apic_send_IPI_mask()
This commit is contained in:
commit
7d20dd3294
@ -15,9 +15,15 @@ struct cluster_mask {
|
||||
struct cpumask mask;
|
||||
};
|
||||
|
||||
static DEFINE_PER_CPU(u32, x86_cpu_to_logical_apicid);
|
||||
/*
|
||||
* __x2apic_send_IPI_mask() possibly needs to read
|
||||
* x86_cpu_to_logical_apicid for all online cpus in a sequential way.
|
||||
* Using per cpu variable would cost one cache line per cpu.
|
||||
*/
|
||||
static u32 *x86_cpu_to_logical_apicid __read_mostly;
|
||||
|
||||
static DEFINE_PER_CPU(cpumask_var_t, ipi_mask);
|
||||
static DEFINE_PER_CPU(struct cluster_mask *, cluster_masks);
|
||||
static DEFINE_PER_CPU_READ_MOSTLY(struct cluster_mask *, cluster_masks);
|
||||
static struct cluster_mask *cluster_hotplug_mask;
|
||||
|
||||
static int x2apic_acpi_madt_oem_check(char *oem_id, char *oem_table_id)
|
||||
@ -27,7 +33,7 @@ static int x2apic_acpi_madt_oem_check(char *oem_id, char *oem_table_id)
|
||||
|
||||
static void x2apic_send_IPI(int cpu, int vector)
|
||||
{
|
||||
u32 dest = per_cpu(x86_cpu_to_logical_apicid, cpu);
|
||||
u32 dest = x86_cpu_to_logical_apicid[cpu];
|
||||
|
||||
/* x2apic MSRs are special and need a special fence: */
|
||||
weak_wrmsr_fence();
|
||||
@ -58,7 +64,7 @@ __x2apic_send_IPI_mask(const struct cpumask *mask, int vector, int apic_dest)
|
||||
|
||||
dest = 0;
|
||||
for_each_cpu_and(clustercpu, tmpmsk, &cmsk->mask)
|
||||
dest |= per_cpu(x86_cpu_to_logical_apicid, clustercpu);
|
||||
dest |= x86_cpu_to_logical_apicid[clustercpu];
|
||||
|
||||
if (!dest)
|
||||
continue;
|
||||
@ -94,7 +100,7 @@ static void x2apic_send_IPI_all(int vector)
|
||||
|
||||
static u32 x2apic_calc_apicid(unsigned int cpu)
|
||||
{
|
||||
return per_cpu(x86_cpu_to_logical_apicid, cpu);
|
||||
return x86_cpu_to_logical_apicid[cpu];
|
||||
}
|
||||
|
||||
static void init_x2apic_ldr(void)
|
||||
@ -103,7 +109,7 @@ static void init_x2apic_ldr(void)
|
||||
u32 cluster, apicid = apic_read(APIC_LDR);
|
||||
unsigned int cpu;
|
||||
|
||||
this_cpu_write(x86_cpu_to_logical_apicid, apicid);
|
||||
x86_cpu_to_logical_apicid[smp_processor_id()] = apicid;
|
||||
|
||||
if (cmsk)
|
||||
goto update;
|
||||
@ -166,12 +172,21 @@ static int x2apic_dead_cpu(unsigned int dead_cpu)
|
||||
|
||||
static int x2apic_cluster_probe(void)
|
||||
{
|
||||
u32 slots;
|
||||
|
||||
if (!x2apic_mode)
|
||||
return 0;
|
||||
|
||||
slots = max_t(u32, L1_CACHE_BYTES/sizeof(u32), nr_cpu_ids);
|
||||
x86_cpu_to_logical_apicid = kcalloc(slots, sizeof(u32), GFP_KERNEL);
|
||||
if (!x86_cpu_to_logical_apicid)
|
||||
return 0;
|
||||
|
||||
if (cpuhp_setup_state(CPUHP_X2APIC_PREPARE, "x86/x2apic:prepare",
|
||||
x2apic_prepare_cpu, x2apic_dead_cpu) < 0) {
|
||||
pr_err("Failed to register X2APIC_PREPARE\n");
|
||||
kfree(x86_cpu_to_logical_apicid);
|
||||
x86_cpu_to_logical_apicid = NULL;
|
||||
return 0;
|
||||
}
|
||||
init_x2apic_ldr();
|
||||
|
Loading…
Reference in New Issue
Block a user