23d72bfd8f
Consolidate the mux and demux of ipi messages into smp.c and call a new smp_ops callback to actually trigger the ipi. The powerpc architecture code is optimised for having 4 distinct ipi triggers, which are mapped to 4 distinct messages (ipi many, ipi single, scheduler ipi, and enter debugger). However, several interrupt controllers only provide a single software triggered interrupt that can be delivered to each cpu. To resolve this limitation, each smp_ops implementation created a per-cpu variable that is manipulated with atomic bitops. Since these lines will be contended they are optimialy marked as shared_aligned and take a full cache line for each cpu. Distro kernels may have 2 or 3 of these in their config, each taking per-cpu space even though at most one will be in use. This consolidation removes smp_message_recv and replaces the single call actions cases with direct calls from the common message recognition loop. The complicated debugger ipi case with its muxed crash handling code is moved to debug_ipi_action which is now called from the demux code (instead of the multi-message action calling smp_message_recv). I put a call to reschedule_action to increase the likelyhood of correctly merging the anticipated scheduler_ipi() hook coming from the scheduler tree; that single required call can be inlined later. The actual message decode is a copy of the old pseries xics code with its memory barriers and cache line spacing, augmented with a per-cpu unsigned long based on the book-e doorbell code. The optional data is set via a callback from the implementation and is passed to the new cause-ipi hook along with the logical cpu number. While currently only the doorbell implemntation uses this data it should be almost zero cost to retrieve and pass it -- it adds a single register load for the argument from the same cache line to which we just completed a store and the register is dead on return from the call. I extended the data element from unsigned int to unsigned long in case some other code wanted to associate a pointer. The doorbell check_self is replaced by a call to smp_muxed_ipi_resend, conditioned on the CPU_DBELL feature. The ifdef guard could be relaxed to CONFIG_SMP but I left it with BOOKE for now. Also, the doorbell interrupt vector for book-e was not calling irq_enter and irq_exit, which throws off cpu accounting and causes code to not realize it is running in interrupt context. Add the missing calls. Signed-off-by: Milton Miller <miltonm@bga.com> Signed-off-by: Benjamin Herrenschmidt <benh@kernel.crashing.org>
261 lines
6.3 KiB
C
261 lines
6.3 KiB
C
/*
|
|
* SMP support for pSeries machines.
|
|
*
|
|
* Dave Engebretsen, Peter Bergner, and
|
|
* Mike Corrigan {engebret|bergner|mikec}@us.ibm.com
|
|
*
|
|
* Plus various changes from other IBM teams...
|
|
*
|
|
* This program is free software; you can redistribute it and/or
|
|
* modify it under the terms of the GNU General Public License
|
|
* as published by the Free Software Foundation; either version
|
|
* 2 of the License, or (at your option) any later version.
|
|
*/
|
|
|
|
|
|
#include <linux/kernel.h>
|
|
#include <linux/module.h>
|
|
#include <linux/sched.h>
|
|
#include <linux/smp.h>
|
|
#include <linux/interrupt.h>
|
|
#include <linux/delay.h>
|
|
#include <linux/init.h>
|
|
#include <linux/spinlock.h>
|
|
#include <linux/cache.h>
|
|
#include <linux/err.h>
|
|
#include <linux/sysdev.h>
|
|
#include <linux/cpu.h>
|
|
|
|
#include <asm/ptrace.h>
|
|
#include <asm/atomic.h>
|
|
#include <asm/irq.h>
|
|
#include <asm/page.h>
|
|
#include <asm/pgtable.h>
|
|
#include <asm/io.h>
|
|
#include <asm/prom.h>
|
|
#include <asm/smp.h>
|
|
#include <asm/paca.h>
|
|
#include <asm/machdep.h>
|
|
#include <asm/cputable.h>
|
|
#include <asm/firmware.h>
|
|
#include <asm/system.h>
|
|
#include <asm/rtas.h>
|
|
#include <asm/pSeries_reconfig.h>
|
|
#include <asm/mpic.h>
|
|
#include <asm/vdso_datapage.h>
|
|
#include <asm/cputhreads.h>
|
|
#include <asm/mpic.h>
|
|
#include <asm/xics.h>
|
|
|
|
#include "plpar_wrappers.h"
|
|
#include "pseries.h"
|
|
#include "offline_states.h"
|
|
|
|
|
|
/*
|
|
* The Primary thread of each non-boot processor was started from the OF client
|
|
* interface by prom_hold_cpus and is spinning on secondary_hold_spinloop.
|
|
*/
|
|
static cpumask_var_t of_spin_mask;
|
|
|
|
/* Query where a cpu is now. Return codes #defined in plpar_wrappers.h */
|
|
int smp_query_cpu_stopped(unsigned int pcpu)
|
|
{
|
|
int cpu_status, status;
|
|
int qcss_tok = rtas_token("query-cpu-stopped-state");
|
|
|
|
if (qcss_tok == RTAS_UNKNOWN_SERVICE) {
|
|
printk_once(KERN_INFO
|
|
"Firmware doesn't support query-cpu-stopped-state\n");
|
|
return QCSS_HARDWARE_ERROR;
|
|
}
|
|
|
|
status = rtas_call(qcss_tok, 1, 2, &cpu_status, pcpu);
|
|
if (status != 0) {
|
|
printk(KERN_ERR
|
|
"RTAS query-cpu-stopped-state failed: %i\n", status);
|
|
return status;
|
|
}
|
|
|
|
return cpu_status;
|
|
}
|
|
|
|
/**
|
|
* smp_startup_cpu() - start the given cpu
|
|
*
|
|
* At boot time, there is nothing to do for primary threads which were
|
|
* started from Open Firmware. For anything else, call RTAS with the
|
|
* appropriate start location.
|
|
*
|
|
* Returns:
|
|
* 0 - failure
|
|
* 1 - success
|
|
*/
|
|
static inline int __devinit smp_startup_cpu(unsigned int lcpu)
|
|
{
|
|
int status;
|
|
unsigned long start_here = __pa((u32)*((unsigned long *)
|
|
generic_secondary_smp_init));
|
|
unsigned int pcpu;
|
|
int start_cpu;
|
|
|
|
if (cpumask_test_cpu(lcpu, of_spin_mask))
|
|
/* Already started by OF and sitting in spin loop */
|
|
return 1;
|
|
|
|
pcpu = get_hard_smp_processor_id(lcpu);
|
|
|
|
/* Check to see if the CPU out of FW already for kexec */
|
|
if (smp_query_cpu_stopped(pcpu) == QCSS_NOT_STOPPED){
|
|
cpumask_set_cpu(lcpu, of_spin_mask);
|
|
return 1;
|
|
}
|
|
|
|
/* Fixup atomic count: it exited inside IRQ handler. */
|
|
task_thread_info(paca[lcpu].__current)->preempt_count = 0;
|
|
#ifdef CONFIG_HOTPLUG_CPU
|
|
if (get_cpu_current_state(lcpu) == CPU_STATE_INACTIVE)
|
|
goto out;
|
|
#endif
|
|
/*
|
|
* If the RTAS start-cpu token does not exist then presume the
|
|
* cpu is already spinning.
|
|
*/
|
|
start_cpu = rtas_token("start-cpu");
|
|
if (start_cpu == RTAS_UNKNOWN_SERVICE)
|
|
return 1;
|
|
|
|
status = rtas_call(start_cpu, 3, 1, NULL, pcpu, start_here, pcpu);
|
|
if (status != 0) {
|
|
printk(KERN_ERR "start-cpu failed: %i\n", status);
|
|
return 0;
|
|
}
|
|
|
|
#ifdef CONFIG_HOTPLUG_CPU
|
|
out:
|
|
#endif
|
|
return 1;
|
|
}
|
|
|
|
static void __devinit smp_xics_setup_cpu(int cpu)
|
|
{
|
|
if (cpu != boot_cpuid)
|
|
xics_setup_cpu();
|
|
|
|
if (firmware_has_feature(FW_FEATURE_SPLPAR))
|
|
vpa_init(cpu);
|
|
|
|
cpumask_clear_cpu(cpu, of_spin_mask);
|
|
#ifdef CONFIG_HOTPLUG_CPU
|
|
set_cpu_current_state(cpu, CPU_STATE_ONLINE);
|
|
set_default_offline_state(cpu);
|
|
#endif
|
|
}
|
|
|
|
static int __devinit smp_pSeries_kick_cpu(int nr)
|
|
{
|
|
BUG_ON(nr < 0 || nr >= NR_CPUS);
|
|
|
|
if (!smp_startup_cpu(nr))
|
|
return -ENOENT;
|
|
|
|
/*
|
|
* The processor is currently spinning, waiting for the
|
|
* cpu_start field to become non-zero After we set cpu_start,
|
|
* the processor will continue on to secondary_start
|
|
*/
|
|
paca[nr].cpu_start = 1;
|
|
#ifdef CONFIG_HOTPLUG_CPU
|
|
set_preferred_offline_state(nr, CPU_STATE_ONLINE);
|
|
|
|
if (get_cpu_current_state(nr) == CPU_STATE_INACTIVE) {
|
|
long rc;
|
|
unsigned long hcpuid;
|
|
|
|
hcpuid = get_hard_smp_processor_id(nr);
|
|
rc = plpar_hcall_norets(H_PROD, hcpuid);
|
|
if (rc != H_SUCCESS)
|
|
printk(KERN_ERR "Error: Prod to wake up processor %d "
|
|
"Ret= %ld\n", nr, rc);
|
|
}
|
|
#endif
|
|
|
|
return 0;
|
|
}
|
|
|
|
static int smp_pSeries_cpu_bootable(unsigned int nr)
|
|
{
|
|
/* Special case - we inhibit secondary thread startup
|
|
* during boot if the user requests it.
|
|
*/
|
|
if (system_state < SYSTEM_RUNNING && cpu_has_feature(CPU_FTR_SMT)) {
|
|
if (!smt_enabled_at_boot && cpu_thread_in_core(nr) != 0)
|
|
return 0;
|
|
if (smt_enabled_at_boot
|
|
&& cpu_thread_in_core(nr) >= smt_enabled_at_boot)
|
|
return 0;
|
|
}
|
|
|
|
return 1;
|
|
}
|
|
|
|
static struct smp_ops_t pSeries_mpic_smp_ops = {
|
|
.message_pass = smp_mpic_message_pass,
|
|
.probe = smp_mpic_probe,
|
|
.kick_cpu = smp_pSeries_kick_cpu,
|
|
.setup_cpu = smp_mpic_setup_cpu,
|
|
};
|
|
|
|
static struct smp_ops_t pSeries_xics_smp_ops = {
|
|
.message_pass = smp_muxed_ipi_message_pass,
|
|
.cause_ipi = NULL, /* Filled at runtime by xics_smp_probe() */
|
|
.probe = xics_smp_probe,
|
|
.kick_cpu = smp_pSeries_kick_cpu,
|
|
.setup_cpu = smp_xics_setup_cpu,
|
|
.cpu_bootable = smp_pSeries_cpu_bootable,
|
|
};
|
|
|
|
/* This is called very early */
|
|
static void __init smp_init_pseries(void)
|
|
{
|
|
int i;
|
|
|
|
pr_debug(" -> smp_init_pSeries()\n");
|
|
|
|
alloc_bootmem_cpumask_var(&of_spin_mask);
|
|
|
|
/* Mark threads which are still spinning in hold loops. */
|
|
if (cpu_has_feature(CPU_FTR_SMT)) {
|
|
for_each_present_cpu(i) {
|
|
if (cpu_thread_in_core(i) == 0)
|
|
cpumask_set_cpu(i, of_spin_mask);
|
|
}
|
|
} else {
|
|
cpumask_copy(of_spin_mask, cpu_present_mask);
|
|
}
|
|
|
|
cpumask_clear_cpu(boot_cpuid, of_spin_mask);
|
|
|
|
/* Non-lpar has additional take/give timebase */
|
|
if (rtas_token("freeze-time-base") != RTAS_UNKNOWN_SERVICE) {
|
|
smp_ops->give_timebase = rtas_give_timebase;
|
|
smp_ops->take_timebase = rtas_take_timebase;
|
|
}
|
|
|
|
pr_debug(" <- smp_init_pSeries()\n");
|
|
}
|
|
|
|
void __init smp_init_pseries_mpic(void)
|
|
{
|
|
smp_ops = &pSeries_mpic_smp_ops;
|
|
|
|
smp_init_pseries();
|
|
}
|
|
|
|
void __init smp_init_pseries_xics(void)
|
|
{
|
|
smp_ops = &pSeries_xics_smp_ops;
|
|
|
|
smp_init_pseries();
|
|
}
|