2006-12-05 17:52:36 +11:00
/*
* pseries CPU Hotplug infrastructure .
*
2006-12-05 17:52:38 +11:00
* Split out from arch / powerpc / platforms / pseries / setup . c
* arch / powerpc / kernel / rtas . c , and arch / powerpc / platforms / pseries / smp . c
2006-12-05 17:52:36 +11:00
*
* Peter Bergner , IBM March 2001.
* Copyright ( C ) 2001 IBM .
2006-12-05 17:52:38 +11:00
* Dave Engebretsen , Peter Bergner , and
* Mike Corrigan { engebret | bergner | mikec } @ us . ibm . com
* Plus various changes from other IBM teams . . .
2006-12-05 17:52:36 +11:00
*
* Copyright ( C ) 2006 Michael Ellerman , IBM Corporation
*
* This program is free software ; you can redistribute it and / or
* modify it under the terms of the GNU General Public License
* as published by the Free Software Foundation ; either version
* 2 of the License , or ( at your option ) any later version .
*/
2015-12-16 14:52:39 -06:00
# define pr_fmt(fmt) "pseries-hotplug-cpu: " fmt
2006-12-05 17:52:36 +11:00
# include <linux/kernel.h>
2011-04-04 13:46:58 +10:00
# include <linux/interrupt.h>
2006-12-05 17:52:36 +11:00
# include <linux/delay.h>
2011-05-27 14:25:11 -04:00
# include <linux/sched.h> /* for idle_task_exit */
2017-02-08 18:51:36 +01:00
# include <linux/sched/hotplug.h>
2006-12-05 17:52:36 +11:00
# include <linux/cpu.h>
2012-10-02 16:57:57 +00:00
# include <linux/of.h>
2015-12-16 14:54:05 -06:00
# include <linux/slab.h>
2006-12-05 17:52:36 +11:00
# include <asm/prom.h>
# include <asm/rtas.h>
# include <asm/firmware.h>
# include <asm/machdep.h>
# include <asm/vdso_datapage.h>
2011-04-04 13:46:58 +10:00
# include <asm/xics.h>
powerpc/xive: guest exploitation of the XIVE interrupt controller
This is the framework for using XIVE in a PowerVM guest. The support
is very similar to the native one in a much simpler form.
Each source is associated with an Event State Buffer (ESB). This is a
two bit state machine which is used to trigger events. The bits are
named "P" (pending) and "Q" (queued) and can be controlled by MMIO.
The Guest OS registers event (or notifications) queues on which the HW
will post event data for a target to notify.
Instead of OPAL calls, a set of Hypervisors call are used to configure
the interrupt sources and the event/notification queues of the guest:
- H_INT_GET_SOURCE_INFO
used to obtain the address of the MMIO page of the Event State
Buffer (PQ bits) entry associated with the source.
- H_INT_SET_SOURCE_CONFIG
assigns a source to a "target".
- H_INT_GET_SOURCE_CONFIG
determines to which "target" and "priority" is assigned to a source
- H_INT_GET_QUEUE_INFO
returns the address of the notification management page associated
with the specified "target" and "priority".
- H_INT_SET_QUEUE_CONFIG
sets or resets the event queue for a given "target" and "priority".
It is also used to set the notification config associated with the
queue, only unconditional notification for the moment. Reset is
performed with a queue size of 0 and queueing is disabled in that
case.
- H_INT_GET_QUEUE_CONFIG
returns the queue settings for a given "target" and "priority".
- H_INT_RESET
resets all of the partition's interrupt exploitation structures to
their initial state, losing all configuration set via the hcalls
H_INT_SET_SOURCE_CONFIG and H_INT_SET_QUEUE_CONFIG.
- H_INT_SYNC
issue a synchronisation on a source to make sure sure all
notifications have reached their queue.
As for XICS, the XIVE interface for the guest is described in the
device tree under the "interrupt-controller" node. A couple of new
properties are specific to XIVE :
- "reg"
contains the base address and size of the thread interrupt
managnement areas (TIMA), also called rings, for the User level and
for the Guest OS level. Only the Guest OS level is taken into
account today.
- "ibm,xive-eq-sizes"
the size of the event queues. One cell per size supported, contains
log2 of size, in ascending order.
- "ibm,xive-lisn-ranges"
the interrupt numbers ranges assigned to the guest. These are
allocated using a simple bitmap.
and also :
- "/ibm,plat-res-int-priorities"
contains a list of priorities that the hypervisor has reserved for
its own use.
Tested with a QEMU XIVE model for pseries and with the Power hypervisor.
Signed-off-by: Cédric Le Goater <clg@kaod.org>
Acked-by: Benjamin Herrenschmidt <benh@kernel.crashing.org>
Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
2017-08-30 21:46:11 +02:00
# include <asm/xive.h>
2013-08-22 15:23:52 +05:30
# include <asm/plpar_wrappers.h>
2015-12-16 14:50:21 -06:00
# include "pseries.h"
2009-10-29 19:22:53 +00:00
# include "offline_states.h"
2006-12-05 17:52:36 +11:00
/* This version can't take the spinlock, because it never returns */
2014-02-20 21:13:52 +11:00
static int rtas_stop_self_token = RTAS_UNKNOWN_SERVICE ;
2006-12-05 17:52:36 +11:00
2009-10-29 19:22:53 +00:00
static DEFINE_PER_CPU ( enum cpu_state_vals , preferred_offline_state ) =
CPU_STATE_OFFLINE ;
static DEFINE_PER_CPU ( enum cpu_state_vals , current_state ) = CPU_STATE_OFFLINE ;
static enum cpu_state_vals default_offline_state = CPU_STATE_OFFLINE ;
2016-03-17 14:23:00 -07:00
static bool cede_offline_enabled __read_mostly = true ;
2009-10-29 19:22:53 +00:00
/*
* Enable / disable cede_offline when available .
*/
static int __init setup_cede_offline ( char * str )
{
2016-03-17 14:23:00 -07:00
return ( kstrtobool ( str , & cede_offline_enabled ) = = 0 ) ;
2009-10-29 19:22:53 +00:00
}
__setup ( " cede_offline= " , setup_cede_offline ) ;
enum cpu_state_vals get_cpu_current_state ( int cpu )
{
return per_cpu ( current_state , cpu ) ;
}
void set_cpu_current_state ( int cpu , enum cpu_state_vals state )
{
per_cpu ( current_state , cpu ) = state ;
}
enum cpu_state_vals get_preferred_offline_state ( int cpu )
{
return per_cpu ( preferred_offline_state , cpu ) ;
}
void set_preferred_offline_state ( int cpu , enum cpu_state_vals state )
{
per_cpu ( preferred_offline_state , cpu ) = state ;
}
void set_default_offline_state ( int cpu )
{
per_cpu ( preferred_offline_state , cpu ) = default_offline_state ;
}
2006-12-05 17:52:37 +11:00
static void rtas_stop_self ( void )
2006-12-05 17:52:36 +11:00
{
2015-11-24 22:26:10 +11:00
static struct rtas_args args ;
2014-04-28 08:29:51 +08:00
2006-12-05 17:52:36 +11:00
local_irq_disable ( ) ;
2014-02-20 21:13:52 +11:00
BUG_ON ( rtas_stop_self_token = = RTAS_UNKNOWN_SERVICE ) ;
2006-12-05 17:52:36 +11:00
printk ( " cpu %u (hwid %u) Ready to die... \n " ,
smp_processor_id ( ) , hard_smp_processor_id ( ) ) ;
2015-11-24 22:26:10 +11:00
rtas_call_unlocked ( & args , rtas_stop_self_token , 0 , 1 , NULL ) ;
2006-12-05 17:52:36 +11:00
panic ( " Alas, I survived. \n " ) ;
}
2006-12-05 17:52:39 +11:00
static void pseries_mach_cpu_die ( void )
2006-12-05 17:52:37 +11:00
{
2009-10-29 19:22:53 +00:00
unsigned int cpu = smp_processor_id ( ) ;
unsigned int hwcpu = hard_smp_processor_id ( ) ;
u8 cede_latency_hint = 0 ;
2006-12-05 17:52:37 +11:00
local_irq_disable ( ) ;
idle_task_exit ( ) ;
powerpc/xive: guest exploitation of the XIVE interrupt controller
This is the framework for using XIVE in a PowerVM guest. The support
is very similar to the native one in a much simpler form.
Each source is associated with an Event State Buffer (ESB). This is a
two bit state machine which is used to trigger events. The bits are
named "P" (pending) and "Q" (queued) and can be controlled by MMIO.
The Guest OS registers event (or notifications) queues on which the HW
will post event data for a target to notify.
Instead of OPAL calls, a set of Hypervisors call are used to configure
the interrupt sources and the event/notification queues of the guest:
- H_INT_GET_SOURCE_INFO
used to obtain the address of the MMIO page of the Event State
Buffer (PQ bits) entry associated with the source.
- H_INT_SET_SOURCE_CONFIG
assigns a source to a "target".
- H_INT_GET_SOURCE_CONFIG
determines to which "target" and "priority" is assigned to a source
- H_INT_GET_QUEUE_INFO
returns the address of the notification management page associated
with the specified "target" and "priority".
- H_INT_SET_QUEUE_CONFIG
sets or resets the event queue for a given "target" and "priority".
It is also used to set the notification config associated with the
queue, only unconditional notification for the moment. Reset is
performed with a queue size of 0 and queueing is disabled in that
case.
- H_INT_GET_QUEUE_CONFIG
returns the queue settings for a given "target" and "priority".
- H_INT_RESET
resets all of the partition's interrupt exploitation structures to
their initial state, losing all configuration set via the hcalls
H_INT_SET_SOURCE_CONFIG and H_INT_SET_QUEUE_CONFIG.
- H_INT_SYNC
issue a synchronisation on a source to make sure sure all
notifications have reached their queue.
As for XICS, the XIVE interface for the guest is described in the
device tree under the "interrupt-controller" node. A couple of new
properties are specific to XIVE :
- "reg"
contains the base address and size of the thread interrupt
managnement areas (TIMA), also called rings, for the User level and
for the Guest OS level. Only the Guest OS level is taken into
account today.
- "ibm,xive-eq-sizes"
the size of the event queues. One cell per size supported, contains
log2 of size, in ascending order.
- "ibm,xive-lisn-ranges"
the interrupt numbers ranges assigned to the guest. These are
allocated using a simple bitmap.
and also :
- "/ibm,plat-res-int-priorities"
contains a list of priorities that the hypervisor has reserved for
its own use.
Tested with a QEMU XIVE model for pseries and with the Power hypervisor.
Signed-off-by: Cédric Le Goater <clg@kaod.org>
Acked-by: Benjamin Herrenschmidt <benh@kernel.crashing.org>
Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
2017-08-30 21:46:11 +02:00
if ( xive_enabled ( ) )
xive_teardown_cpu ( ) ;
else
xics_teardown_cpu ( ) ;
2009-10-29 19:22:53 +00:00
if ( get_preferred_offline_state ( cpu ) = = CPU_STATE_INACTIVE ) {
set_cpu_current_state ( cpu , CPU_STATE_INACTIVE ) ;
2010-07-07 12:31:02 +00:00
if ( ppc_md . suspend_disable_cpu )
ppc_md . suspend_disable_cpu ( ) ;
2009-10-29 19:22:53 +00:00
cede_latency_hint = 2 ;
get_lppaca ( ) - > idle = 1 ;
2013-08-07 02:01:26 +10:00
if ( ! lppaca_shared_proc ( get_lppaca ( ) ) )
2009-10-29 19:22:53 +00:00
get_lppaca ( ) - > donate_dedicated_cpu = 1 ;
while ( get_preferred_offline_state ( cpu ) = = CPU_STATE_INACTIVE ) {
2012-10-17 21:30:13 +00:00
while ( ! prep_irq_for_idle ( ) ) {
local_irq_enable ( ) ;
local_irq_disable ( ) ;
}
2009-10-29 19:22:53 +00:00
extended_cede_processor ( cede_latency_hint ) ;
}
2012-10-17 21:30:13 +00:00
local_irq_disable ( ) ;
2013-08-07 02:01:26 +10:00
if ( ! lppaca_shared_proc ( get_lppaca ( ) ) )
2009-10-29 19:22:53 +00:00
get_lppaca ( ) - > donate_dedicated_cpu = 0 ;
get_lppaca ( ) - > idle = 0 ;
2010-03-01 02:58:16 +00:00
if ( get_preferred_offline_state ( cpu ) = = CPU_STATE_ONLINE ) {
2011-07-25 01:46:34 +00:00
unregister_slb_shadow ( hwcpu ) ;
2009-10-29 19:22:53 +00:00
2012-10-17 21:30:13 +00:00
hard_irq_disable ( ) ;
2010-03-01 02:58:16 +00:00
/*
* Call to start_secondary_resume ( ) will not return .
* Kernel stack will be reset and start_secondary ( )
* will be called to continue the online operation .
*/
start_secondary_resume ( ) ;
}
}
2009-10-29 19:22:53 +00:00
2010-03-01 02:58:16 +00:00
/* Requested state is CPU_STATE_OFFLINE at this point */
WARN_ON ( get_preferred_offline_state ( cpu ) ! = CPU_STATE_OFFLINE ) ;
2009-10-29 19:22:53 +00:00
2010-03-01 02:58:16 +00:00
set_cpu_current_state ( cpu , CPU_STATE_OFFLINE ) ;
2011-07-25 01:46:34 +00:00
unregister_slb_shadow ( hwcpu ) ;
2010-03-01 02:58:16 +00:00
rtas_stop_self ( ) ;
2009-10-29 19:22:53 +00:00
2006-12-05 17:52:37 +11:00
/* Should never get here... */
BUG ( ) ;
for ( ; ; ) ;
}
2006-12-05 17:52:39 +11:00
static int pseries_cpu_disable ( void )
2006-12-05 17:52:38 +11:00
{
int cpu = smp_processor_id ( ) ;
2009-09-24 09:34:48 -06:00
set_cpu_online ( cpu , false ) ;
2006-12-05 17:52:38 +11:00
vdso_data - > processorCount - - ;
/*fix boot_cpuid here*/
if ( cpu = = boot_cpuid )
2010-04-26 15:32:42 +00:00
boot_cpuid = cpumask_any ( cpu_online_mask ) ;
2006-12-05 17:52:38 +11:00
/* FIXME: abstract this to not be platform specific later on */
powerpc/xive: guest exploitation of the XIVE interrupt controller
This is the framework for using XIVE in a PowerVM guest. The support
is very similar to the native one in a much simpler form.
Each source is associated with an Event State Buffer (ESB). This is a
two bit state machine which is used to trigger events. The bits are
named "P" (pending) and "Q" (queued) and can be controlled by MMIO.
The Guest OS registers event (or notifications) queues on which the HW
will post event data for a target to notify.
Instead of OPAL calls, a set of Hypervisors call are used to configure
the interrupt sources and the event/notification queues of the guest:
- H_INT_GET_SOURCE_INFO
used to obtain the address of the MMIO page of the Event State
Buffer (PQ bits) entry associated with the source.
- H_INT_SET_SOURCE_CONFIG
assigns a source to a "target".
- H_INT_GET_SOURCE_CONFIG
determines to which "target" and "priority" is assigned to a source
- H_INT_GET_QUEUE_INFO
returns the address of the notification management page associated
with the specified "target" and "priority".
- H_INT_SET_QUEUE_CONFIG
sets or resets the event queue for a given "target" and "priority".
It is also used to set the notification config associated with the
queue, only unconditional notification for the moment. Reset is
performed with a queue size of 0 and queueing is disabled in that
case.
- H_INT_GET_QUEUE_CONFIG
returns the queue settings for a given "target" and "priority".
- H_INT_RESET
resets all of the partition's interrupt exploitation structures to
their initial state, losing all configuration set via the hcalls
H_INT_SET_SOURCE_CONFIG and H_INT_SET_QUEUE_CONFIG.
- H_INT_SYNC
issue a synchronisation on a source to make sure sure all
notifications have reached their queue.
As for XICS, the XIVE interface for the guest is described in the
device tree under the "interrupt-controller" node. A couple of new
properties are specific to XIVE :
- "reg"
contains the base address and size of the thread interrupt
managnement areas (TIMA), also called rings, for the User level and
for the Guest OS level. Only the Guest OS level is taken into
account today.
- "ibm,xive-eq-sizes"
the size of the event queues. One cell per size supported, contains
log2 of size, in ascending order.
- "ibm,xive-lisn-ranges"
the interrupt numbers ranges assigned to the guest. These are
allocated using a simple bitmap.
and also :
- "/ibm,plat-res-int-priorities"
contains a list of priorities that the hypervisor has reserved for
its own use.
Tested with a QEMU XIVE model for pseries and with the Power hypervisor.
Signed-off-by: Cédric Le Goater <clg@kaod.org>
Acked-by: Benjamin Herrenschmidt <benh@kernel.crashing.org>
Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
2017-08-30 21:46:11 +02:00
if ( xive_enabled ( ) )
xive_smp_disable_cpu ( ) ;
else
xics_migrate_irqs_away ( ) ;
2006-12-05 17:52:38 +11:00
return 0 ;
}
2009-10-29 19:22:53 +00:00
/*
* pseries_cpu_die : Wait for the cpu to die .
* @ cpu : logical processor id of the CPU whose death we ' re awaiting .
*
* This function is called from the context of the thread which is performing
* the cpu - offline . Here we wait for long enough to allow the cpu in question
* to self - destroy so that the cpu - offline thread can send the CPU_DEAD
* notifications .
*
* OTOH , pseries_mach_cpu_die ( ) is called by the @ cpu when it wants to
* self - destruct .
*/
2006-12-05 17:52:39 +11:00
static void pseries_cpu_die ( unsigned int cpu )
2006-12-05 17:52:38 +11:00
{
int tries ;
2009-10-29 19:22:53 +00:00
int cpu_status = 1 ;
2006-12-05 17:52:38 +11:00
unsigned int pcpu = get_hard_smp_processor_id ( cpu ) ;
2009-10-29 19:22:53 +00:00
if ( get_preferred_offline_state ( cpu ) = = CPU_STATE_INACTIVE ) {
cpu_status = 1 ;
2010-07-31 15:04:15 +10:00
for ( tries = 0 ; tries < 5000 ; tries + + ) {
2009-10-29 19:22:53 +00:00
if ( get_cpu_current_state ( cpu ) = = CPU_STATE_INACTIVE ) {
cpu_status = 0 ;
break ;
}
2010-07-31 15:04:15 +10:00
msleep ( 1 ) ;
2009-10-29 19:22:53 +00:00
}
} else if ( get_preferred_offline_state ( cpu ) = = CPU_STATE_OFFLINE ) {
for ( tries = 0 ; tries < 25 ; tries + + ) {
2010-04-28 13:39:41 +00:00
cpu_status = smp_query_cpu_stopped ( pcpu ) ;
if ( cpu_status = = QCSS_STOPPED | |
cpu_status = = QCSS_HARDWARE_ERROR )
2009-10-29 19:22:53 +00:00
break ;
cpu_relax ( ) ;
}
2006-12-05 17:52:38 +11:00
}
2009-10-29 19:22:53 +00:00
2006-12-05 17:52:38 +11:00
if ( cpu_status ! = 0 ) {
printk ( " Querying DEAD? cpu %i (%i) shows %i \n " ,
cpu , pcpu , cpu_status ) ;
}
2011-03-30 22:57:33 -03:00
/* Isolation and deallocation are definitely done by
2006-12-05 17:52:38 +11:00
* drslot_chrp_cpu . If they were not they would be
* done here . Change isolate state to Isolate and
* change allocation - state to Unusable .
*/
paca [ cpu ] . cpu_start = 0 ;
}
/*
2010-04-26 15:32:44 +00:00
* Update cpu_present_mask and paca ( s ) for a new cpu node . The wrinkle
2006-12-05 17:52:38 +11:00
* here is that a cpu device node may represent up to two logical cpus
* in the SMT case . We must honor the assumption in other code that
* the logical ids for sibling SMT threads x and y are adjacent , such
* that x ^ 1 = = y and y ^ 1 = = x .
*/
2006-12-05 17:52:39 +11:00
static int pseries_add_processor ( struct device_node * np )
2006-12-05 17:52:38 +11:00
{
unsigned int cpu ;
2010-04-26 15:32:42 +00:00
cpumask_var_t candidate_mask , tmp ;
2006-12-05 17:52:38 +11:00
int err = - ENOSPC , len , nthreads , i ;
2014-09-16 15:15:45 -05:00
const __be32 * intserv ;
2006-12-05 17:52:38 +11:00
2007-04-03 22:26:41 +10:00
intserv = of_get_property ( np , " ibm,ppc-interrupt-server#s " , & len ) ;
2006-12-05 17:52:38 +11:00
if ( ! intserv )
return 0 ;
2010-04-26 15:32:42 +00:00
zalloc_cpumask_var ( & candidate_mask , GFP_KERNEL ) ;
zalloc_cpumask_var ( & tmp , GFP_KERNEL ) ;
2006-12-05 17:52:38 +11:00
nthreads = len / sizeof ( u32 ) ;
for ( i = 0 ; i < nthreads ; i + + )
2010-04-26 15:32:42 +00:00
cpumask_set_cpu ( i , tmp ) ;
2006-12-05 17:52:38 +11:00
2008-01-25 21:08:02 +01:00
cpu_maps_update_begin ( ) ;
2006-12-05 17:52:38 +11:00
2010-04-26 15:32:42 +00:00
BUG_ON ( ! cpumask_subset ( cpu_present_mask , cpu_possible_mask ) ) ;
2006-12-05 17:52:38 +11:00
/* Get a bitmap of unoccupied slots. */
2010-04-26 15:32:42 +00:00
cpumask_xor ( candidate_mask , cpu_possible_mask , cpu_present_mask ) ;
if ( cpumask_empty ( candidate_mask ) ) {
2006-12-05 17:52:38 +11:00
/* If we get here, it most likely means that NR_CPUS is
* less than the partition ' s max processors setting .
*/
2017-08-21 10:16:47 -05:00
printk ( KERN_ERR " Cannot add cpu %pOF; this system configuration "
" supports %d logical cpus. \n " , np ,
2015-01-21 16:21:14 -06:00
num_possible_cpus ( ) ) ;
2006-12-05 17:52:38 +11:00
goto out_unlock ;
}
2010-04-26 15:32:42 +00:00
while ( ! cpumask_empty ( tmp ) )
if ( cpumask_subset ( tmp , candidate_mask ) )
2006-12-05 17:52:38 +11:00
/* Found a range where we can insert the new cpu(s) */
break ;
else
2010-04-26 15:32:42 +00:00
cpumask_shift_left ( tmp , tmp , nthreads ) ;
2006-12-05 17:52:38 +11:00
2010-04-26 15:32:42 +00:00
if ( cpumask_empty ( tmp ) ) {
2010-04-26 15:32:44 +00:00
printk ( KERN_ERR " Unable to find space in cpu_present_mask for "
2006-12-05 17:52:38 +11:00
" processor %s with %d thread(s) \n " , np - > name ,
nthreads ) ;
goto out_unlock ;
}
2010-04-26 15:32:42 +00:00
for_each_cpu ( cpu , tmp ) {
2011-04-28 05:07:23 +00:00
BUG_ON ( cpu_present ( cpu ) ) ;
2009-09-24 09:34:48 -06:00
set_cpu_present ( cpu , true ) ;
2014-09-16 15:15:45 -05:00
set_hard_smp_processor_id ( cpu , be32_to_cpu ( * intserv + + ) ) ;
2006-12-05 17:52:38 +11:00
}
err = 0 ;
out_unlock :
2008-01-25 21:08:02 +01:00
cpu_maps_update_done ( ) ;
2010-04-26 15:32:42 +00:00
free_cpumask_var ( candidate_mask ) ;
free_cpumask_var ( tmp ) ;
2006-12-05 17:52:38 +11:00
return err ;
}
/*
* Update the present map for a cpu node which is going away , and set
* the hard id in the paca ( s ) to - 1 to be consistent with boot time
* convention for non - present cpus .
*/
2006-12-05 17:52:39 +11:00
static void pseries_remove_processor ( struct device_node * np )
2006-12-05 17:52:38 +11:00
{
unsigned int cpu ;
int len , nthreads , i ;
2014-09-12 14:11:42 -05:00
const __be32 * intserv ;
u32 thread ;
2006-12-05 17:52:38 +11:00
2007-04-03 22:26:41 +10:00
intserv = of_get_property ( np , " ibm,ppc-interrupt-server#s " , & len ) ;
2006-12-05 17:52:38 +11:00
if ( ! intserv )
return ;
nthreads = len / sizeof ( u32 ) ;
2008-01-25 21:08:02 +01:00
cpu_maps_update_begin ( ) ;
2006-12-05 17:52:38 +11:00
for ( i = 0 ; i < nthreads ; i + + ) {
2014-09-12 14:11:42 -05:00
thread = be32_to_cpu ( intserv [ i ] ) ;
2006-12-05 17:52:38 +11:00
for_each_present_cpu ( cpu ) {
2014-09-12 14:11:42 -05:00
if ( get_hard_smp_processor_id ( cpu ) ! = thread )
2006-12-05 17:52:38 +11:00
continue ;
BUG_ON ( cpu_online ( cpu ) ) ;
2009-09-24 09:34:48 -06:00
set_cpu_present ( cpu , false ) ;
2006-12-05 17:52:38 +11:00
set_hard_smp_processor_id ( cpu , - 1 ) ;
break ;
}
2010-04-26 15:32:42 +00:00
if ( cpu > = nr_cpu_ids )
2006-12-05 17:52:38 +11:00
printk ( KERN_WARNING " Could not find cpu to remove "
2014-09-12 14:11:42 -05:00
" with physical id 0x%x \n " , thread ) ;
2006-12-05 17:52:38 +11:00
}
2008-01-25 21:08:02 +01:00
cpu_maps_update_done ( ) ;
2006-12-05 17:52:38 +11:00
}
2017-11-28 16:58:43 -06:00
extern int find_and_online_cpu_nid ( int cpu ) ;
2015-12-16 14:50:21 -06:00
static int dlpar_online_cpu ( struct device_node * dn )
{
int rc = 0 ;
unsigned int cpu ;
int len , nthreads , i ;
const __be32 * intserv ;
u32 thread ;
intserv = of_get_property ( dn , " ibm,ppc-interrupt-server#s " , & len ) ;
if ( ! intserv )
return - EINVAL ;
nthreads = len / sizeof ( u32 ) ;
cpu_maps_update_begin ( ) ;
for ( i = 0 ; i < nthreads ; i + + ) {
thread = be32_to_cpu ( intserv [ i ] ) ;
for_each_present_cpu ( cpu ) {
if ( get_hard_smp_processor_id ( cpu ) ! = thread )
continue ;
BUG_ON ( get_cpu_current_state ( cpu )
! = CPU_STATE_OFFLINE ) ;
cpu_maps_update_done ( ) ;
2017-09-08 15:47:47 -05:00
timed_topology_update ( 1 ) ;
2017-11-28 16:58:43 -06:00
find_and_online_cpu_nid ( cpu ) ;
2015-12-16 14:50:21 -06:00
rc = device_online ( get_cpu_device ( cpu ) ) ;
if ( rc )
goto out ;
cpu_maps_update_begin ( ) ;
break ;
}
if ( cpu = = num_possible_cpus ( ) )
printk ( KERN_WARNING " Could not find cpu to online "
" with physical id 0x%x \n " , thread ) ;
}
cpu_maps_update_done ( ) ;
out :
return rc ;
}
static bool dlpar_cpu_exists ( struct device_node * parent , u32 drc_index )
{
struct device_node * child = NULL ;
u32 my_drc_index ;
bool found ;
int rc ;
/* Assume cpu doesn't exist */
found = false ;
for_each_child_of_node ( parent , child ) {
rc = of_property_read_u32 ( child , " ibm,my-drc-index " ,
& my_drc_index ) ;
if ( rc )
continue ;
if ( my_drc_index = = drc_index ) {
of_node_put ( child ) ;
found = true ;
break ;
}
}
return found ;
}
2015-12-16 14:55:07 -06:00
static bool valid_cpu_drc_index ( struct device_node * parent , u32 drc_index )
{
bool found = false ;
int rc , index ;
index = 0 ;
while ( ! found ) {
u32 drc ;
rc = of_property_read_u32_index ( parent , " ibm,drc-indexes " ,
index + + , & drc ) ;
if ( rc )
break ;
if ( drc = = drc_index )
found = true ;
}
return found ;
}
2015-12-16 14:51:26 -06:00
static ssize_t dlpar_cpu_add ( u32 drc_index )
2015-12-16 14:50:21 -06:00
{
struct device_node * dn , * parent ;
2015-12-16 14:52:39 -06:00
int rc , saved_rc ;
pr_debug ( " Attempting to add CPU, drc index: %x \n " , drc_index ) ;
2015-12-16 14:50:21 -06:00
parent = of_find_node_by_path ( " /cpus " ) ;
2015-12-16 14:52:39 -06:00
if ( ! parent ) {
pr_warn ( " Failed to find CPU root node \" /cpus \" \n " ) ;
2015-12-16 14:50:21 -06:00
return - ENODEV ;
2015-12-16 14:52:39 -06:00
}
2015-12-16 14:50:21 -06:00
if ( dlpar_cpu_exists ( parent , drc_index ) ) {
of_node_put ( parent ) ;
2015-12-16 14:52:39 -06:00
pr_warn ( " CPU with drc index %x already exists \n " , drc_index ) ;
2015-12-16 14:50:21 -06:00
return - EINVAL ;
}
2015-12-16 14:55:07 -06:00
if ( ! valid_cpu_drc_index ( parent , drc_index ) ) {
of_node_put ( parent ) ;
pr_warn ( " Cannot find CPU (drc index %x) to add. \n " , drc_index ) ;
return - EINVAL ;
}
2015-12-16 14:50:21 -06:00
rc = dlpar_acquire_drc ( drc_index ) ;
if ( rc ) {
2015-12-16 14:52:39 -06:00
pr_warn ( " Failed to acquire DRC, rc: %d, drc index: %x \n " ,
rc , drc_index ) ;
2015-12-16 14:50:21 -06:00
of_node_put ( parent ) ;
return - EINVAL ;
}
dn = dlpar_configure_connector ( cpu_to_be32 ( drc_index ) , parent ) ;
2015-12-16 14:52:39 -06:00
if ( ! dn ) {
pr_warn ( " Failed call to configure-connector, drc index: %x \n " ,
drc_index ) ;
dlpar_release_drc ( drc_index ) ;
2017-09-20 17:02:51 -04:00
of_node_put ( parent ) ;
2015-12-16 14:50:21 -06:00
return - EINVAL ;
2015-12-16 14:52:39 -06:00
}
2015-12-16 14:50:21 -06:00
2017-08-21 10:16:49 -05:00
rc = dlpar_attach_node ( dn , parent ) ;
2017-09-20 17:02:51 -04:00
/* Regardless we are done with parent now */
of_node_put ( parent ) ;
2015-12-16 14:50:21 -06:00
if ( rc ) {
2015-12-16 14:52:39 -06:00
saved_rc = rc ;
pr_warn ( " Failed to attach node %s, rc: %d, drc index: %x \n " ,
dn - > name , rc , drc_index ) ;
rc = dlpar_release_drc ( drc_index ) ;
if ( ! rc )
dlpar_free_cc_nodes ( dn ) ;
return saved_rc ;
2015-12-16 14:50:21 -06:00
}
rc = dlpar_online_cpu ( dn ) ;
2015-12-16 14:52:39 -06:00
if ( rc ) {
saved_rc = rc ;
pr_warn ( " Failed to online cpu %s, rc: %d, drc index: %x \n " ,
dn - > name , rc , drc_index ) ;
rc = dlpar_detach_node ( dn ) ;
if ( ! rc )
dlpar_release_drc ( drc_index ) ;
return saved_rc ;
}
pr_debug ( " Successfully added CPU %s, drc index: %x \n " , dn - > name ,
drc_index ) ;
2015-12-16 14:51:26 -06:00
return rc ;
2015-12-16 14:50:21 -06:00
}
static int dlpar_offline_cpu ( struct device_node * dn )
{
int rc = 0 ;
unsigned int cpu ;
int len , nthreads , i ;
const __be32 * intserv ;
u32 thread ;
intserv = of_get_property ( dn , " ibm,ppc-interrupt-server#s " , & len ) ;
if ( ! intserv )
return - EINVAL ;
nthreads = len / sizeof ( u32 ) ;
cpu_maps_update_begin ( ) ;
for ( i = 0 ; i < nthreads ; i + + ) {
thread = be32_to_cpu ( intserv [ i ] ) ;
for_each_present_cpu ( cpu ) {
if ( get_hard_smp_processor_id ( cpu ) ! = thread )
continue ;
if ( get_cpu_current_state ( cpu ) = = CPU_STATE_OFFLINE )
break ;
if ( get_cpu_current_state ( cpu ) = = CPU_STATE_ONLINE ) {
set_preferred_offline_state ( cpu ,
CPU_STATE_OFFLINE ) ;
cpu_maps_update_done ( ) ;
2017-09-08 15:47:47 -05:00
timed_topology_update ( 1 ) ;
2015-12-16 14:50:21 -06:00
rc = device_offline ( get_cpu_device ( cpu ) ) ;
if ( rc )
goto out ;
cpu_maps_update_begin ( ) ;
break ;
}
/*
* The cpu is in CPU_STATE_INACTIVE .
* Upgrade it ' s state to CPU_STATE_OFFLINE .
*/
set_preferred_offline_state ( cpu , CPU_STATE_OFFLINE ) ;
BUG_ON ( plpar_hcall_norets ( H_PROD , thread )
! = H_SUCCESS ) ;
__cpu_die ( cpu ) ;
break ;
}
if ( cpu = = num_possible_cpus ( ) )
printk ( KERN_WARNING " Could not find cpu to offline with physical id 0x%x \n " , thread ) ;
}
cpu_maps_update_done ( ) ;
out :
return rc ;
}
2015-12-16 14:51:26 -06:00
static ssize_t dlpar_cpu_remove ( struct device_node * dn , u32 drc_index )
{
int rc ;
2017-06-05 16:49:12 +10:00
pr_debug ( " Attempting to remove CPU %s, drc index: %x \n " ,
2015-12-16 14:52:39 -06:00
dn - > name , drc_index ) ;
2015-12-16 14:51:26 -06:00
rc = dlpar_offline_cpu ( dn ) ;
2015-12-16 14:52:39 -06:00
if ( rc ) {
pr_warn ( " Failed to offline CPU %s, rc: %d \n " , dn - > name , rc ) ;
2015-12-16 14:51:26 -06:00
return - EINVAL ;
2015-12-16 14:52:39 -06:00
}
2015-12-16 14:51:26 -06:00
rc = dlpar_release_drc ( drc_index ) ;
2015-12-16 14:52:39 -06:00
if ( rc ) {
pr_warn ( " Failed to release drc (%x) for CPU %s, rc: %d \n " ,
drc_index , dn - > name , rc ) ;
dlpar_online_cpu ( dn ) ;
2015-12-16 14:51:26 -06:00
return rc ;
2015-12-16 14:52:39 -06:00
}
2015-12-16 14:51:26 -06:00
rc = dlpar_detach_node ( dn ) ;
2015-12-16 14:52:39 -06:00
if ( rc ) {
int saved_rc = rc ;
2015-12-16 14:51:26 -06:00
2015-12-16 14:52:39 -06:00
pr_warn ( " Failed to detach CPU %s, rc: %d " , dn - > name , rc ) ;
rc = dlpar_acquire_drc ( drc_index ) ;
if ( ! rc )
dlpar_online_cpu ( dn ) ;
return saved_rc ;
}
pr_debug ( " Successfully removed CPU, drc index: %x \n " , drc_index ) ;
return 0 ;
2015-12-16 14:51:26 -06:00
}
2015-12-16 14:54:05 -06:00
static struct device_node * cpu_drc_index_to_dn ( u32 drc_index )
{
struct device_node * dn ;
u32 my_index ;
int rc ;
for_each_node_by_type ( dn , " cpu " ) {
rc = of_property_read_u32 ( dn , " ibm,my-drc-index " , & my_index ) ;
if ( rc )
continue ;
if ( my_index = = drc_index )
break ;
}
return dn ;
}
static int dlpar_cpu_remove_by_index ( u32 drc_index )
{
struct device_node * dn ;
int rc ;
dn = cpu_drc_index_to_dn ( drc_index ) ;
if ( ! dn ) {
pr_warn ( " Cannot find CPU (drc index %x) to remove \n " ,
drc_index ) ;
return - ENODEV ;
}
rc = dlpar_cpu_remove ( dn , drc_index ) ;
of_node_put ( dn ) ;
return rc ;
}
static int find_dlpar_cpus_to_remove ( u32 * cpu_drcs , int cpus_to_remove )
{
struct device_node * dn ;
int cpus_found = 0 ;
int rc ;
/* We want to find cpus_to_remove + 1 CPUs to ensure we do not
* remove the last CPU .
*/
for_each_node_by_type ( dn , " cpu " ) {
cpus_found + + ;
if ( cpus_found > cpus_to_remove ) {
of_node_put ( dn ) ;
break ;
}
/* Note that cpus_found is always 1 ahead of the index
* into the cpu_drcs array , so we use cpus_found - 1
*/
rc = of_property_read_u32 ( dn , " ibm,my-drc-index " ,
& cpu_drcs [ cpus_found - 1 ] ) ;
if ( rc ) {
pr_warn ( " Error occurred getting drc-index for %s \n " ,
dn - > name ) ;
of_node_put ( dn ) ;
return - 1 ;
}
}
if ( cpus_found < cpus_to_remove ) {
pr_warn ( " Failed to find enough CPUs (%d of %d) to remove \n " ,
cpus_found , cpus_to_remove ) ;
} else if ( cpus_found = = cpus_to_remove ) {
pr_warn ( " Cannot remove all CPUs \n " ) ;
}
return cpus_found ;
}
static int dlpar_cpu_remove_by_count ( u32 cpus_to_remove )
{
u32 * cpu_drcs ;
int cpus_found ;
int cpus_removed = 0 ;
int i , rc ;
pr_debug ( " Attempting to hot-remove %d CPUs \n " , cpus_to_remove ) ;
cpu_drcs = kcalloc ( cpus_to_remove , sizeof ( * cpu_drcs ) , GFP_KERNEL ) ;
if ( ! cpu_drcs )
return - EINVAL ;
cpus_found = find_dlpar_cpus_to_remove ( cpu_drcs , cpus_to_remove ) ;
if ( cpus_found < = cpus_to_remove ) {
kfree ( cpu_drcs ) ;
return - EINVAL ;
}
for ( i = 0 ; i < cpus_to_remove ; i + + ) {
rc = dlpar_cpu_remove_by_index ( cpu_drcs [ i ] ) ;
if ( rc )
break ;
cpus_removed + + ;
}
if ( cpus_removed ! = cpus_to_remove ) {
pr_warn ( " CPU hot-remove failed, adding back removed CPUs \n " ) ;
for ( i = 0 ; i < cpus_removed ; i + + )
dlpar_cpu_add ( cpu_drcs [ i ] ) ;
rc = - EINVAL ;
} else {
rc = 0 ;
}
kfree ( cpu_drcs ) ;
return rc ;
}
2015-12-16 14:55:07 -06:00
static int find_dlpar_cpus_to_add ( u32 * cpu_drcs , u32 cpus_to_add )
{
struct device_node * parent ;
int cpus_found = 0 ;
int index , rc ;
parent = of_find_node_by_path ( " /cpus " ) ;
if ( ! parent ) {
pr_warn ( " Could not find CPU root node in device tree \n " ) ;
kfree ( cpu_drcs ) ;
return - 1 ;
}
/* Search the ibm,drc-indexes array for possible CPU drcs to
* add . Note that the format of the ibm , drc - indexes array is
* the number of entries in the array followed by the array
* of drc values so we start looking at index = 1.
*/
index = 1 ;
while ( cpus_found < cpus_to_add ) {
u32 drc ;
rc = of_property_read_u32_index ( parent , " ibm,drc-indexes " ,
index + + , & drc ) ;
if ( rc )
break ;
if ( dlpar_cpu_exists ( parent , drc ) )
continue ;
cpu_drcs [ cpus_found + + ] = drc ;
}
of_node_put ( parent ) ;
return cpus_found ;
}
static int dlpar_cpu_add_by_count ( u32 cpus_to_add )
{
u32 * cpu_drcs ;
int cpus_added = 0 ;
int cpus_found ;
int i , rc ;
pr_debug ( " Attempting to hot-add %d CPUs \n " , cpus_to_add ) ;
cpu_drcs = kcalloc ( cpus_to_add , sizeof ( * cpu_drcs ) , GFP_KERNEL ) ;
if ( ! cpu_drcs )
return - EINVAL ;
cpus_found = find_dlpar_cpus_to_add ( cpu_drcs , cpus_to_add ) ;
if ( cpus_found < cpus_to_add ) {
pr_warn ( " Failed to find enough CPUs (%d of %d) to add \n " ,
cpus_found , cpus_to_add ) ;
kfree ( cpu_drcs ) ;
return - EINVAL ;
}
for ( i = 0 ; i < cpus_to_add ; i + + ) {
rc = dlpar_cpu_add ( cpu_drcs [ i ] ) ;
if ( rc )
break ;
cpus_added + + ;
}
if ( cpus_added < cpus_to_add ) {
pr_warn ( " CPU hot-add failed, removing any added CPUs \n " ) ;
for ( i = 0 ; i < cpus_added ; i + + )
dlpar_cpu_remove_by_index ( cpu_drcs [ i ] ) ;
rc = - EINVAL ;
} else {
rc = 0 ;
}
kfree ( cpu_drcs ) ;
return rc ;
}
2015-12-16 14:54:05 -06:00
int dlpar_cpu ( struct pseries_hp_errorlog * hp_elog )
{
u32 count , drc_index ;
int rc ;
count = hp_elog - > _drc_u . drc_count ;
drc_index = hp_elog - > _drc_u . drc_index ;
lock_device_hotplug ( ) ;
switch ( hp_elog - > action ) {
case PSERIES_HP_ELOG_ACTION_REMOVE :
if ( hp_elog - > id_type = = PSERIES_HP_ELOG_ID_DRC_COUNT )
rc = dlpar_cpu_remove_by_count ( count ) ;
else if ( hp_elog - > id_type = = PSERIES_HP_ELOG_ID_DRC_INDEX )
rc = dlpar_cpu_remove_by_index ( drc_index ) ;
else
rc = - EINVAL ;
break ;
2015-12-16 14:55:07 -06:00
case PSERIES_HP_ELOG_ACTION_ADD :
if ( hp_elog - > id_type = = PSERIES_HP_ELOG_ID_DRC_COUNT )
rc = dlpar_cpu_add_by_count ( count ) ;
else if ( hp_elog - > id_type = = PSERIES_HP_ELOG_ID_DRC_INDEX )
rc = dlpar_cpu_add ( drc_index ) ;
else
rc = - EINVAL ;
break ;
2015-12-16 14:54:05 -06:00
default :
pr_err ( " Invalid action (%d) specified \n " , hp_elog - > action ) ;
rc = - EINVAL ;
break ;
}
unlock_device_hotplug ( ) ;
return rc ;
}
2015-12-16 14:51:26 -06:00
# ifdef CONFIG_ARCH_CPU_PROBE_RELEASE
static ssize_t dlpar_cpu_probe ( const char * buf , size_t count )
{
u32 drc_index ;
int rc ;
rc = kstrtou32 ( buf , 0 , & drc_index ) ;
if ( rc )
return - EINVAL ;
rc = dlpar_cpu_add ( drc_index ) ;
return rc ? rc : count ;
}
2015-12-16 14:50:21 -06:00
static ssize_t dlpar_cpu_release ( const char * buf , size_t count )
{
struct device_node * dn ;
u32 drc_index ;
int rc ;
dn = of_find_node_by_path ( buf ) ;
if ( ! dn )
return - EINVAL ;
rc = of_property_read_u32 ( dn , " ibm,my-drc-index " , & drc_index ) ;
if ( rc ) {
of_node_put ( dn ) ;
return - EINVAL ;
}
2015-12-16 14:51:26 -06:00
rc = dlpar_cpu_remove ( dn , drc_index ) ;
2015-12-16 14:50:21 -06:00
of_node_put ( dn ) ;
2015-12-16 14:51:26 -06:00
return rc ? rc : count ;
2015-12-16 14:50:21 -06:00
}
# endif /* CONFIG_ARCH_CPU_PROBE_RELEASE */
2006-12-05 17:52:39 +11:00
static int pseries_smp_notifier ( struct notifier_block * nb ,
2014-11-24 17:58:01 +00:00
unsigned long action , void * data )
2006-12-05 17:52:38 +11:00
{
2014-11-24 17:58:01 +00:00
struct of_reconfig_data * rd = data ;
2011-06-21 03:35:56 +00:00
int err = 0 ;
2006-12-05 17:52:38 +11:00
switch ( action ) {
2012-10-02 16:57:57 +00:00
case OF_RECONFIG_ATTACH_NODE :
2014-11-24 17:58:01 +00:00
err = pseries_add_processor ( rd - > dn ) ;
2006-12-05 17:52:38 +11:00
break ;
2012-10-02 16:57:57 +00:00
case OF_RECONFIG_DETACH_NODE :
2014-11-24 17:58:01 +00:00
pseries_remove_processor ( rd - > dn ) ;
2006-12-05 17:52:38 +11:00
break ;
}
2011-06-21 03:35:56 +00:00
return notifier_from_errno ( err ) ;
2006-12-05 17:52:38 +11:00
}
2006-12-05 17:52:39 +11:00
static struct notifier_block pseries_smp_nb = {
. notifier_call = pseries_smp_notifier ,
2006-12-05 17:52:38 +11:00
} ;
2009-10-29 19:22:53 +00:00
# define MAX_CEDE_LATENCY_LEVELS 4
# define CEDE_LATENCY_PARAM_LENGTH 10
# define CEDE_LATENCY_PARAM_MAX_LENGTH \
( MAX_CEDE_LATENCY_LEVELS * CEDE_LATENCY_PARAM_LENGTH * sizeof ( char ) )
# define CEDE_LATENCY_TOKEN 45
static char cede_parameters [ CEDE_LATENCY_PARAM_MAX_LENGTH ] ;
static int parse_cede_parameters ( void )
{
memset ( cede_parameters , 0 , CEDE_LATENCY_PARAM_MAX_LENGTH ) ;
2010-02-07 13:52:05 +00:00
return rtas_call ( rtas_token ( " ibm,get-system-parameter " ) , 3 , 1 ,
NULL ,
CEDE_LATENCY_TOKEN ,
__pa ( cede_parameters ) ,
CEDE_LATENCY_PARAM_MAX_LENGTH ) ;
2009-10-29 19:22:53 +00:00
}
2006-12-05 17:52:36 +11:00
static int __init pseries_cpu_hotplug_init ( void )
{
2009-10-29 19:22:53 +00:00
int cpu ;
2010-04-28 13:39:41 +00:00
int qcss_tok ;
2007-10-10 10:38:24 +10:00
2015-12-16 14:50:21 -06:00
# ifdef CONFIG_ARCH_CPU_PROBE_RELEASE
ppc_md . cpu_probe = dlpar_cpu_probe ;
ppc_md . cpu_release = dlpar_cpu_release ;
# endif /* CONFIG_ARCH_CPU_PROBE_RELEASE */
2014-02-20 21:13:52 +11:00
rtas_stop_self_token = rtas_token ( " stop-self " ) ;
2006-12-05 17:52:38 +11:00
qcss_tok = rtas_token ( " query-cpu-stopped-state " ) ;
2006-12-05 17:52:36 +11:00
2014-02-20 21:13:52 +11:00
if ( rtas_stop_self_token = = RTAS_UNKNOWN_SERVICE | |
2006-12-05 17:52:38 +11:00
qcss_tok = = RTAS_UNKNOWN_SERVICE ) {
printk ( KERN_INFO " CPU Hotplug not supported by firmware "
" - disabling. \n " ) ;
return 0 ;
}
2006-12-05 17:52:37 +11:00
2006-12-05 17:52:39 +11:00
ppc_md . cpu_die = pseries_mach_cpu_die ;
smp_ops - > cpu_disable = pseries_cpu_disable ;
smp_ops - > cpu_die = pseries_cpu_die ;
2006-12-05 17:52:38 +11:00
/* Processors can be added/removed only on LPAR */
2009-10-29 19:22:53 +00:00
if ( firmware_has_feature ( FW_FEATURE_LPAR ) ) {
2012-10-02 16:57:57 +00:00
of_reconfig_notifier_register ( & pseries_smp_nb ) ;
2009-10-29 19:22:53 +00:00
cpu_maps_update_begin ( ) ;
if ( cede_offline_enabled & & parse_cede_parameters ( ) = = 0 ) {
default_offline_state = CPU_STATE_INACTIVE ;
for_each_online_cpu ( cpu )
set_default_offline_state ( cpu ) ;
}
cpu_maps_update_done ( ) ;
}
2006-12-05 17:52:38 +11:00
2006-12-05 17:52:36 +11:00
return 0 ;
}
2013-12-10 11:31:02 +11:00
machine_arch_initcall ( pseries , pseries_cpu_hotplug_init ) ;