2007-07-18 05:37:06 +04:00
/*
* Xen SMP support
*
* This file implements the Xen versions of smp_ops . SMP under Xen is
* very straightforward . Bringing a CPU up is simply a matter of
* loading its initial context and setting it running .
*
* IPIs are handled through the Xen event mechanism .
*
* Because virtual CPUs can be scheduled onto any real CPU , there ' s no
* useful topology information for the kernel to make use of . As a
* result , all CPUs are treated as if they ' re single - core and
* single - threaded .
*
* This does not handle HOTPLUG_CPU yet .
*/
# include <linux/sched.h>
# include <linux/err.h>
# include <linux/smp.h>
# include <asm/paravirt.h>
# include <asm/desc.h>
# include <asm/pgtable.h>
# include <asm/cpu.h>
# include <xen/interface/xen.h>
# include <xen/interface/vcpu.h>
# include <asm/xen/interface.h>
# include <asm/xen/hypercall.h>
# include <xen/page.h>
# include <xen/events.h>
# include "xen-ops.h"
# include "mmu.h"
static cpumask_t cpu_initialized_map ;
static DEFINE_PER_CPU ( int , resched_irq ) ;
static DEFINE_PER_CPU ( int , callfunc_irq ) ;
/*
* Structure and data for smp_call_function ( ) . This is designed to minimise
* static memory requirements . It also looks cleaner .
*/
static DEFINE_SPINLOCK ( call_lock ) ;
struct call_data_struct {
void ( * func ) ( void * info ) ;
void * info ;
atomic_t started ;
atomic_t finished ;
int wait ;
} ;
static irqreturn_t xen_call_function_interrupt ( int irq , void * dev_id ) ;
static struct call_data_struct * call_data ;
/*
* Reschedule call back . Nothing to do ,
* all the work is done automatically when
* we return from the interrupt .
*/
static irqreturn_t xen_reschedule_interrupt ( int irq , void * dev_id )
{
return IRQ_HANDLED ;
}
static __cpuinit void cpu_bringup_and_idle ( void )
{
int cpu = smp_processor_id ( ) ;
cpu_init ( ) ;
preempt_disable ( ) ;
per_cpu ( cpu_state , cpu ) = CPU_ONLINE ;
xen_setup_cpu_clockevents ( ) ;
/* We can take interrupts now: we're officially "up". */
local_irq_enable ( ) ;
wmb ( ) ; /* make sure everything is out */
cpu_idle ( ) ;
}
static int xen_smp_intr_init ( unsigned int cpu )
{
int rc ;
const char * resched_name , * callfunc_name ;
per_cpu ( resched_irq , cpu ) = per_cpu ( callfunc_irq , cpu ) = - 1 ;
resched_name = kasprintf ( GFP_KERNEL , " resched%d " , cpu ) ;
rc = bind_ipi_to_irqhandler ( XEN_RESCHEDULE_VECTOR ,
cpu ,
xen_reschedule_interrupt ,
IRQF_DISABLED | IRQF_PERCPU | IRQF_NOBALANCING ,
resched_name ,
NULL ) ;
if ( rc < 0 )
goto fail ;
per_cpu ( resched_irq , cpu ) = rc ;
callfunc_name = kasprintf ( GFP_KERNEL , " callfunc%d " , cpu ) ;
rc = bind_ipi_to_irqhandler ( XEN_CALL_FUNCTION_VECTOR ,
cpu ,
xen_call_function_interrupt ,
IRQF_DISABLED | IRQF_PERCPU | IRQF_NOBALANCING ,
callfunc_name ,
NULL ) ;
if ( rc < 0 )
goto fail ;
per_cpu ( callfunc_irq , cpu ) = rc ;
return 0 ;
fail :
if ( per_cpu ( resched_irq , cpu ) > = 0 )
unbind_from_irqhandler ( per_cpu ( resched_irq , cpu ) , NULL ) ;
if ( per_cpu ( callfunc_irq , cpu ) > = 0 )
unbind_from_irqhandler ( per_cpu ( callfunc_irq , cpu ) , NULL ) ;
return rc ;
}
void __init xen_fill_possible_map ( void )
{
int i , rc ;
for ( i = 0 ; i < NR_CPUS ; i + + ) {
rc = HYPERVISOR_vcpu_op ( VCPUOP_is_up , i , NULL ) ;
if ( rc > = 0 )
cpu_set ( i , cpu_possible_map ) ;
}
}
void __init xen_smp_prepare_boot_cpu ( void )
{
int cpu ;
BUG_ON ( smp_processor_id ( ) ! = 0 ) ;
native_smp_prepare_boot_cpu ( ) ;
/* We've switched to the "real" per-cpu gdt, so make sure the
old memory can be recycled */
make_lowmem_page_readwrite ( & per_cpu__gdt_page ) ;
2008-01-30 15:30:55 +03:00
for_each_possible_cpu ( cpu ) {
2007-10-16 12:24:05 +04:00
cpus_clear ( per_cpu ( cpu_sibling_map , cpu ) ) ;
2007-10-16 12:24:04 +04:00
/*
* cpu_core_map lives in a per cpu area that is cleared
* when the per cpu array is allocated .
*
* cpus_clear ( per_cpu ( cpu_core_map , cpu ) ) ;
*/
2007-07-18 05:37:06 +04:00
}
2007-07-18 05:37:07 +04:00
xen_setup_vcpu_info_placement ( ) ;
2007-07-18 05:37:06 +04:00
}
void __init xen_smp_prepare_cpus ( unsigned int max_cpus )
{
unsigned cpu ;
2008-01-30 15:30:55 +03:00
for_each_possible_cpu ( cpu ) {
2007-10-16 12:24:05 +04:00
cpus_clear ( per_cpu ( cpu_sibling_map , cpu ) ) ;
2007-10-16 12:24:04 +04:00
/*
* cpu_core_ map will be zeroed when the per
* cpu area is allocated .
*
* cpus_clear ( per_cpu ( cpu_core_map , cpu ) ) ;
*/
2007-07-18 05:37:06 +04:00
}
smp_store_cpu_info ( 0 ) ;
set_cpu_sibling_map ( 0 ) ;
if ( xen_smp_intr_init ( 0 ) )
BUG ( ) ;
cpu_initialized_map = cpumask_of_cpu ( 0 ) ;
/* Restrict the possible_map according to max_cpus. */
while ( ( num_possible_cpus ( ) > 1 ) & & ( num_possible_cpus ( ) > max_cpus ) ) {
for ( cpu = NR_CPUS - 1 ; ! cpu_isset ( cpu , cpu_possible_map ) ; cpu - - )
continue ;
cpu_clear ( cpu , cpu_possible_map ) ;
}
for_each_possible_cpu ( cpu ) {
struct task_struct * idle ;
if ( cpu = = 0 )
continue ;
idle = fork_idle ( cpu ) ;
if ( IS_ERR ( idle ) )
panic ( " failed fork for CPU %d " , cpu ) ;
cpu_set ( cpu , cpu_present_map ) ;
}
//init_xenbus_allowed_cpumask();
}
static __cpuinit int
cpu_initialize_context ( unsigned int cpu , struct task_struct * idle )
{
struct vcpu_guest_context * ctxt ;
struct gdt_page * gdt = & per_cpu ( gdt_page , cpu ) ;
if ( cpu_test_and_set ( cpu , cpu_initialized_map ) )
return 0 ;
ctxt = kzalloc ( sizeof ( * ctxt ) , GFP_KERNEL ) ;
if ( ctxt = = NULL )
return - ENOMEM ;
ctxt - > flags = VGCF_IN_KERNEL ;
ctxt - > user_regs . ds = __USER_DS ;
ctxt - > user_regs . es = __USER_DS ;
ctxt - > user_regs . fs = __KERNEL_PERCPU ;
ctxt - > user_regs . gs = 0 ;
ctxt - > user_regs . ss = __KERNEL_DS ;
ctxt - > user_regs . eip = ( unsigned long ) cpu_bringup_and_idle ;
ctxt - > user_regs . eflags = 0x1000 ; /* IOPL_RING1 */
memset ( & ctxt - > fpu_ctxt , 0 , sizeof ( ctxt - > fpu_ctxt ) ) ;
xen_copy_trap_info ( ctxt - > trap_ctxt ) ;
ctxt - > ldt_ents = 0 ;
BUG_ON ( ( unsigned long ) gdt - > gdt & ~ PAGE_MASK ) ;
make_lowmem_page_readonly ( gdt - > gdt ) ;
ctxt - > gdt_frames [ 0 ] = virt_to_mfn ( gdt - > gdt ) ;
ctxt - > gdt_ents = ARRAY_SIZE ( gdt - > gdt ) ;
ctxt - > user_regs . cs = __KERNEL_CS ;
ctxt - > user_regs . esp = idle - > thread . esp0 - sizeof ( struct pt_regs ) ;
ctxt - > kernel_ss = __KERNEL_DS ;
ctxt - > kernel_sp = idle - > thread . esp0 ;
ctxt - > event_callback_cs = __KERNEL_CS ;
ctxt - > event_callback_eip = ( unsigned long ) xen_hypervisor_callback ;
ctxt - > failsafe_callback_cs = __KERNEL_CS ;
ctxt - > failsafe_callback_eip = ( unsigned long ) xen_failsafe_callback ;
per_cpu ( xen_cr3 , cpu ) = __pa ( swapper_pg_dir ) ;
ctxt - > ctrlreg [ 3 ] = xen_pfn_to_cr3 ( virt_to_mfn ( swapper_pg_dir ) ) ;
if ( HYPERVISOR_vcpu_op ( VCPUOP_initialise , cpu , ctxt ) )
BUG ( ) ;
kfree ( ctxt ) ;
return 0 ;
}
int __cpuinit xen_cpu_up ( unsigned int cpu )
{
struct task_struct * idle = idle_task ( cpu ) ;
int rc ;
#if 0
rc = cpu_up_check ( cpu ) ;
if ( rc )
return rc ;
# endif
init_gdt ( cpu ) ;
per_cpu ( current_task , cpu ) = idle ;
irq_ctx_init ( cpu ) ;
xen_setup_timer ( cpu ) ;
/* make sure interrupts start blocked */
per_cpu ( xen_vcpu , cpu ) - > evtchn_upcall_mask = 1 ;
rc = cpu_initialize_context ( cpu , idle ) ;
if ( rc )
return rc ;
if ( num_online_cpus ( ) = = 1 )
alternatives_smp_switch ( 1 ) ;
rc = xen_smp_intr_init ( cpu ) ;
if ( rc )
return rc ;
smp_store_cpu_info ( cpu ) ;
set_cpu_sibling_map ( cpu ) ;
/* This must be done before setting cpu_online_map */
wmb ( ) ;
cpu_set ( cpu , cpu_online_map ) ;
rc = HYPERVISOR_vcpu_op ( VCPUOP_up , cpu , NULL ) ;
BUG_ON ( rc ) ;
return 0 ;
}
void xen_smp_cpus_done ( unsigned int max_cpus )
{
}
static void stop_self ( void * v )
{
int cpu = smp_processor_id ( ) ;
/* make sure we're not pinning something down */
load_cr3 ( swapper_pg_dir ) ;
/* should set up a minimal gdt */
HYPERVISOR_vcpu_op ( VCPUOP_down , cpu , NULL ) ;
BUG ( ) ;
}
void xen_smp_send_stop ( void )
{
2007-07-18 05:37:07 +04:00
smp_call_function ( stop_self , NULL , 0 , 0 ) ;
2007-07-18 05:37:06 +04:00
}
void xen_smp_send_reschedule ( int cpu )
{
xen_send_IPI_one ( cpu , XEN_RESCHEDULE_VECTOR ) ;
}
static void xen_send_IPI_mask ( cpumask_t mask , enum ipi_vector vector )
{
unsigned cpu ;
cpus_and ( mask , mask , cpu_online_map ) ;
for_each_cpu_mask ( cpu , mask )
xen_send_IPI_one ( cpu , vector ) ;
}
static irqreturn_t xen_call_function_interrupt ( int irq , void * dev_id )
{
void ( * func ) ( void * info ) = call_data - > func ;
void * info = call_data - > info ;
int wait = call_data - > wait ;
/*
* Notify initiating CPU that I ' ve grabbed the data and am
* about to execute the function
*/
mb ( ) ;
atomic_inc ( & call_data - > started ) ;
/*
* At this point the info structure may be out of scope unless wait = = 1
*/
irq_enter ( ) ;
( * func ) ( info ) ;
2007-10-17 20:04:40 +04:00
__get_cpu_var ( irq_stat ) . irq_call_count + + ;
2007-07-18 05:37:06 +04:00
irq_exit ( ) ;
if ( wait ) {
mb ( ) ; /* commit everything before setting finished */
atomic_inc ( & call_data - > finished ) ;
}
return IRQ_HANDLED ;
}
int xen_smp_call_function_mask ( cpumask_t mask , void ( * func ) ( void * ) ,
void * info , int wait )
{
struct call_data_struct data ;
2007-10-16 22:51:30 +04:00
int cpus , cpu ;
bool yield ;
2007-07-18 05:37:06 +04:00
/* Holding any lock stops cpus from going down. */
spin_lock ( & call_lock ) ;
cpu_clear ( smp_processor_id ( ) , mask ) ;
cpus = cpus_weight ( mask ) ;
if ( ! cpus ) {
spin_unlock ( & call_lock ) ;
return 0 ;
}
/* Can deadlock when called with interrupts disabled */
WARN_ON ( irqs_disabled ( ) ) ;
data . func = func ;
data . info = info ;
atomic_set ( & data . started , 0 ) ;
data . wait = wait ;
if ( wait )
atomic_set ( & data . finished , 0 ) ;
call_data = & data ;
mb ( ) ; /* write everything before IPI */
/* Send a message to other CPUs and wait for them to respond */
xen_send_IPI_mask ( mask , XEN_CALL_FUNCTION_VECTOR ) ;
2007-10-16 22:51:30 +04:00
/* Make sure other vcpus get a chance to run if they need to. */
yield = false ;
for_each_cpu_mask ( cpu , mask )
if ( xen_vcpu_stolen ( cpu ) )
yield = true ;
if ( yield )
HYPERVISOR_sched_op ( SCHEDOP_yield , 0 ) ;
2007-07-18 05:37:06 +04:00
/* Wait for response */
while ( atomic_read ( & data . started ) ! = cpus | |
( wait & & atomic_read ( & data . finished ) ! = cpus ) )
cpu_relax ( ) ;
spin_unlock ( & call_lock ) ;
return 0 ;
}