2009-07-31 02:03:45 +04:00
/*P:800
* Interrupts ( traps ) are complicated enough to earn their own file .
2007-07-26 21:41:02 +04:00
* There are three classes of interrupts :
*
* 1 ) Real hardware interrupts which occur while we ' re running the Guest ,
* 2 ) Interrupts for virtual devices attached to the Guest , and
* 3 ) Traps and faults from the Guest .
*
* Real hardware interrupts must be delivered to the Host , not the Guest .
* Virtual interrupts must be delivered to the Guest , but we make them look
* just like real hardware would deliver them . Traps from the Guest can be set
* up to go directly back into the Guest , but sometimes the Host wants to see
* them first , so we also have a way of " reflecting " them into the Guest as if
2009-07-31 02:03:45 +04:00
* they had been delivered to it directly .
: */
2007-07-19 12:49:23 +04:00
# include <linux/uaccess.h>
2007-10-22 05:03:35 +04:00
# include <linux/interrupt.h>
# include <linux/module.h>
2009-10-07 17:09:06 +04:00
# include <linux/sched.h>
2007-07-19 12:49:23 +04:00
# include "lg.h"
2007-10-22 05:03:35 +04:00
/* Allow Guests to use a non-128 (ie. non-Linux) syscall trap. */
2015-05-11 08:17:04 +03:00
static unsigned int syscall_vector = IA32_SYSCALL_VECTOR ;
2007-10-22 05:03:35 +04:00
module_param ( syscall_vector , uint , 0444 ) ;
2007-07-26 21:41:04 +04:00
/* The address of the interrupt handler is split into two bits: */
2007-07-19 12:49:23 +04:00
static unsigned long idt_address ( u32 lo , u32 hi )
{
return ( lo & 0x0000FFFF ) | ( hi & 0xFFFF0000 ) ;
}
2009-07-31 02:03:45 +04:00
/*
* The " type " of the interrupt handler is a 4 bit field : we only support a
* couple of types .
*/
2007-07-19 12:49:23 +04:00
static int idt_type ( u32 lo , u32 hi )
{
return ( hi > > 8 ) & 0xF ;
}
2007-07-26 21:41:04 +04:00
/* An IDT entry can't be used unless the "present" bit is set. */
2009-03-18 19:38:35 +03:00
static bool idt_present ( u32 lo , u32 hi )
2007-07-19 12:49:23 +04:00
{
return ( hi & 0x8000 ) ;
}
2009-07-31 02:03:45 +04:00
/*
* We need a helper to " push " a value onto the Guest ' s stack , since that ' s a
* big part of what delivering an interrupt does .
*/
2008-01-18 00:19:42 +03:00
static void push_guest_stack ( struct lg_cpu * cpu , unsigned long * gstack , u32 val )
2007-07-19 12:49:23 +04:00
{
2007-07-26 21:41:04 +04:00
/* Stack grows upwards: move stack then write value. */
2007-07-19 12:49:23 +04:00
* gstack - = 4 ;
2008-01-18 00:19:42 +03:00
lgwrite ( cpu , * gstack , u32 , val ) ;
2007-07-19 12:49:23 +04:00
}
2009-07-31 02:03:45 +04:00
/*H:210
2015-04-01 06:02:20 +03:00
* The push_guest_interrupt_stack ( ) routine saves Guest state on the stack for
* an interrupt or trap . The mechanics of delivering traps and interrupts to
* the Guest are the same , except some traps have an " error code " which gets
* pushed onto the stack as well : the caller tells us if this is one .
2007-07-26 21:41:04 +04:00
*
* We set up the stack just like the CPU does for a real interrupt , so it ' s
* identical for the Guest ( and the standard " iret " instruction will undo
2009-07-31 02:03:45 +04:00
* it ) .
*/
2015-04-01 06:02:20 +03:00
static void push_guest_interrupt_stack ( struct lg_cpu * cpu , bool has_err )
2007-07-19 12:49:23 +04:00
{
2007-10-22 05:03:36 +04:00
unsigned long gstack , origstack ;
2007-07-19 12:49:23 +04:00
u32 eflags , ss , irq_enable ;
2007-10-22 05:03:36 +04:00
unsigned long virtstack ;
2007-07-19 12:49:23 +04:00
2009-07-31 02:03:45 +04:00
/*
* There are two cases for interrupts : one where the Guest is already
2007-07-26 21:41:04 +04:00
* in the kernel , and a more complex one where the Guest is in
2009-07-31 02:03:45 +04:00
* userspace . We check the privilege level to find out .
*/
2008-01-07 16:05:32 +03:00
if ( ( cpu - > regs - > ss & 0x3 ) ! = GUEST_PL ) {
2009-07-31 02:03:45 +04:00
/*
* The Guest told us their kernel stack with the SET_STACK
* hypercall : both the virtual address and the segment .
*/
2008-01-07 16:05:35 +03:00
virtstack = cpu - > esp1 ;
ss = cpu - > ss1 ;
2007-10-22 05:03:36 +04:00
2008-01-07 16:05:37 +03:00
origstack = gstack = guest_pa ( cpu , virtstack ) ;
2009-07-31 02:03:45 +04:00
/*
* We push the old stack segment and pointer onto the new
2007-07-26 21:41:04 +04:00
* stack : when the Guest does an " iret " back from the interrupt
* handler the CPU will notice they ' re dropping privilege
2009-07-31 02:03:45 +04:00
* levels and expect these here .
*/
2008-01-18 00:19:42 +03:00
push_guest_stack ( cpu , & gstack , cpu - > regs - > ss ) ;
push_guest_stack ( cpu , & gstack , cpu - > regs - > esp ) ;
2007-07-19 12:49:23 +04:00
} else {
2007-07-26 21:41:04 +04:00
/* We're staying on the same Guest (kernel) stack. */
2008-01-07 16:05:32 +03:00
virtstack = cpu - > regs - > esp ;
ss = cpu - > regs - > ss ;
2007-10-22 05:03:36 +04:00
2008-01-07 16:05:37 +03:00
origstack = gstack = guest_pa ( cpu , virtstack ) ;
2007-07-19 12:49:23 +04:00
}
2009-07-31 02:03:45 +04:00
/*
* Remember that we never let the Guest actually disable interrupts , so
2007-07-26 21:41:04 +04:00
* the " Interrupt Flag " bit is always set . We copy that bit from the
2007-10-25 09:02:50 +04:00
* Guest ' s " irq_enabled " field into the eflags word : we saw the Guest
2009-07-31 02:03:45 +04:00
* copy it back in " lguest_iret " .
*/
2008-01-07 16:05:32 +03:00
eflags = cpu - > regs - > eflags ;
2008-01-18 00:19:42 +03:00
if ( get_user ( irq_enable , & cpu - > lg - > lguest_data - > irq_enabled ) = = 0
2007-07-20 16:11:13 +04:00
& & ! ( irq_enable & X86_EFLAGS_IF ) )
eflags & = ~ X86_EFLAGS_IF ;
2007-07-19 12:49:23 +04:00
2009-07-31 02:03:45 +04:00
/*
* An interrupt is expected to push three things on the stack : the old
2007-07-26 21:41:04 +04:00
* " eflags " word , the old code segment , and the old instruction
2009-07-31 02:03:45 +04:00
* pointer .
*/
2008-01-18 00:19:42 +03:00
push_guest_stack ( cpu , & gstack , eflags ) ;
push_guest_stack ( cpu , & gstack , cpu - > regs - > cs ) ;
push_guest_stack ( cpu , & gstack , cpu - > regs - > eip ) ;
2007-07-19 12:49:23 +04:00
2007-07-26 21:41:04 +04:00
/* For the six traps which supply an error code, we push that, too. */
2007-07-19 12:49:23 +04:00
if ( has_err )
2008-01-18 00:19:42 +03:00
push_guest_stack ( cpu , & gstack , cpu - > regs - > errcode ) ;
2007-07-19 12:49:23 +04:00
2015-04-01 06:02:20 +03:00
/* Adjust the stack pointer and stack segment. */
2008-01-07 16:05:32 +03:00
cpu - > regs - > ss = ss ;
cpu - > regs - > esp = virtstack + ( gstack - origstack ) ;
2015-04-01 06:02:20 +03:00
}
/*
* This actually makes the Guest start executing the given interrupt / trap
* handler .
*
* " lo " and " hi " are the two parts of the Interrupt Descriptor Table for this
* interrupt or trap . It ' s split into two parts for traditional reasons : gcc
* on i386 used to be frightened by 64 bit numbers .
*/
static void guest_run_interrupt ( struct lg_cpu * cpu , u32 lo , u32 hi )
{
/* If we're already in the kernel, we don't change stacks. */
if ( ( cpu - > regs - > ss & 0x3 ) ! = GUEST_PL )
cpu - > regs - > ss = cpu - > esp1 ;
/*
* Set the code segment and the address to execute .
*/
2008-01-07 16:05:32 +03:00
cpu - > regs - > cs = ( __KERNEL_CS | GUEST_PL ) ;
cpu - > regs - > eip = idt_address ( lo , hi ) ;
2007-07-19 12:49:23 +04:00
2013-09-05 12:15:53 +04:00
/*
* Trapping always clears these flags :
* TF : Trap flag
* VM : Virtual 8086 mode
* RF : Resume
* NT : Nested task .
*/
cpu - > regs - > eflags & =
~ ( X86_EFLAGS_TF | X86_EFLAGS_VM | X86_EFLAGS_RF | X86_EFLAGS_NT ) ;
2009-07-31 02:03:45 +04:00
/*
* There are two kinds of interrupt handlers : 0xE is an " interrupt
* gate " which expects interrupts to be disabled on entry.
*/
2007-07-19 12:49:23 +04:00
if ( idt_type ( lo , hi ) = = 0xE )
2008-01-18 00:19:42 +03:00
if ( put_user ( 0 , & cpu - > lg - > lguest_data - > irq_enabled ) )
kill_guest ( cpu , " Disabling interrupts " ) ;
2007-07-19 12:49:23 +04:00
}
2015-04-01 06:02:20 +03:00
/* This restores the eflags word which was pushed on the stack by a trap */
static void restore_eflags ( struct lg_cpu * cpu )
{
/* This is the physical address of the stack. */
unsigned long stack_pa = guest_pa ( cpu , cpu - > regs - > esp ) ;
/*
* Stack looks like this :
* Address Contents
* esp EIP
* esp + 4 CS
* esp + 8 EFLAGS
*/
cpu - > regs - > eflags = lgread ( cpu , stack_pa + 8 , u32 ) ;
cpu - > regs - > eflags & =
~ ( X86_EFLAGS_TF | X86_EFLAGS_VM | X86_EFLAGS_RF | X86_EFLAGS_NT ) ;
}
2007-10-25 09:02:50 +04:00
/*H:205
2007-07-26 21:41:04 +04:00
* Virtual Interrupts .
*
2009-06-13 08:27:02 +04:00
* interrupt_pending ( ) returns the first pending interrupt which isn ' t blocked
* by the Guest . It is called before every entry to the Guest , and just before
2009-07-31 02:03:45 +04:00
* we go to sleep when the Guest has halted itself .
*/
2009-06-13 08:27:02 +04:00
unsigned int interrupt_pending ( struct lg_cpu * cpu , bool * more )
2007-07-19 12:49:23 +04:00
{
unsigned int irq ;
DECLARE_BITMAP ( blk , LGUEST_IRQS ) ;
2007-07-26 21:41:04 +04:00
/* If the Guest hasn't even initialized yet, we can do nothing. */
2008-01-18 00:19:42 +03:00
if ( ! cpu - > lg - > lguest_data )
2009-06-13 08:27:02 +04:00
return LGUEST_IRQS ;
2007-07-19 12:49:23 +04:00
2009-07-31 02:03:45 +04:00
/*
* Take our " irqs_pending " array and remove any interrupts the Guest
* wants blocked : the result ends up in " blk " .
*/
2008-01-18 00:19:42 +03:00
if ( copy_from_user ( & blk , cpu - > lg - > lguest_data - > blocked_interrupts ,
2007-07-19 12:49:23 +04:00
sizeof ( blk ) ) )
2009-06-13 08:27:02 +04:00
return LGUEST_IRQS ;
2008-01-07 16:05:29 +03:00
bitmap_andnot ( blk , cpu - > irqs_pending , blk , LGUEST_IRQS ) ;
2007-07-19 12:49:23 +04:00
2007-07-26 21:41:04 +04:00
/* Find the first interrupt. */
2007-07-19 12:49:23 +04:00
irq = find_first_bit ( blk , LGUEST_IRQS ) ;
2009-06-13 08:27:02 +04:00
* more = find_next_bit ( blk , LGUEST_IRQS , irq + 1 ) ;
2009-06-13 08:27:02 +04:00
return irq ;
}
2009-07-31 02:03:45 +04:00
/*
* This actually diverts the Guest to running an interrupt handler , once an
* interrupt has been identified by interrupt_pending ( ) .
*/
2009-06-13 08:27:02 +04:00
void try_deliver_interrupt ( struct lg_cpu * cpu , unsigned int irq , bool more )
2009-06-13 08:27:02 +04:00
{
struct desc_struct * idt ;
BUG_ON ( irq > = LGUEST_IRQS ) ;
2007-07-19 12:49:23 +04:00
2007-07-26 21:41:04 +04:00
/* If they're halted, interrupts restart them. */
2008-01-07 16:05:34 +03:00
if ( cpu - > halted ) {
2007-07-19 12:49:23 +04:00
/* Re-enable interrupts. */
2008-01-18 00:19:42 +03:00
if ( put_user ( X86_EFLAGS_IF , & cpu - > lg - > lguest_data - > irq_enabled ) )
kill_guest ( cpu , " Re-enabling interrupts " ) ;
2008-01-07 16:05:34 +03:00
cpu - > halted = 0 ;
2007-07-19 12:49:23 +04:00
} else {
2007-07-26 21:41:04 +04:00
/* Otherwise we check if they have interrupts disabled. */
2007-07-19 12:49:23 +04:00
u32 irq_enabled ;
2008-01-18 00:19:42 +03:00
if ( get_user ( irq_enabled , & cpu - > lg - > lguest_data - > irq_enabled ) )
2007-07-19 12:49:23 +04:00
irq_enabled = 0 ;
2009-06-13 08:27:02 +04:00
if ( ! irq_enabled ) {
/* Make sure they know an IRQ is pending. */
put_user ( X86_EFLAGS_IF ,
& cpu - > lg - > lguest_data - > irq_pending ) ;
2007-07-19 12:49:23 +04:00
return ;
2009-06-13 08:27:02 +04:00
}
2007-07-19 12:49:23 +04:00
}
2009-07-31 02:03:45 +04:00
/*
* Look at the IDT entry the Guest gave us for this interrupt . The
2007-07-26 21:41:04 +04:00
* first 32 ( FIRST_EXTERNAL_VECTOR ) entries are for traps , so we skip
2009-07-31 02:03:45 +04:00
* over them .
*/
2008-01-07 16:05:33 +03:00
idt = & cpu - > arch . idt [ FIRST_EXTERNAL_VECTOR + irq ] ;
2007-07-26 21:41:04 +04:00
/* If they don't have a handler (yet?), we just ignore it */
2007-07-19 12:49:23 +04:00
if ( idt_present ( idt - > a , idt - > b ) ) {
2007-07-26 21:41:04 +04:00
/* OK, mark it no longer pending and deliver it. */
2008-01-07 16:05:29 +03:00
clear_bit ( irq , cpu - > irqs_pending ) ;
2015-04-01 06:02:20 +03:00
2009-07-31 02:03:45 +04:00
/*
2015-04-01 06:02:20 +03:00
* They may be about to iret , where they asked us never to
* deliver interrupts . In this case , we can emulate that iret
* then immediately deliver the interrupt . This is basically
* a noop : the iret would pop the interrupt frame and restore
* eflags , and then we ' d set it up again . So just restore the
* eflags word and jump straight to the handler in this case .
*
* Denys Vlasenko points out that this isn ' t quite right : if
* the iret was returning to userspace , then that interrupt
* would reset the stack pointer ( which the Guest told us
* about via LHCALL_SET_STACK ) . But unless the Guest is being
* * really * weird , that will be the same as the current stack
* anyway .
2009-07-31 02:03:45 +04:00
*/
2015-04-01 06:02:20 +03:00
if ( cpu - > regs - > eip = = cpu - > lg - > noirq_iret ) {
restore_eflags ( cpu ) ;
} else {
/*
* set_guest_interrupt ( ) takes a flag to say whether
* this interrupt pushes an error code onto the stack
* as well : virtual interrupts never do .
*/
push_guest_interrupt_stack ( cpu , false ) ;
}
/* Actually make Guest cpu jump to handler. */
guest_run_interrupt ( cpu , idt - > a , idt - > b ) ;
2007-07-19 12:49:23 +04:00
}
2007-07-27 07:42:52 +04:00
2009-07-31 02:03:45 +04:00
/*
* Every time we deliver an interrupt , we update the timestamp in the
2007-07-27 07:42:52 +04:00
* Guest ' s lguest_data struct . It would be better for the Guest if we
* did this more often , but it can actually be quite slow : doing it
* here is a compromise which means at least it gets updated every
2009-07-31 02:03:45 +04:00
* timer interrupt .
*/
2008-01-18 00:19:42 +03:00
write_timestamp ( cpu ) ;
2009-06-13 08:27:02 +04:00
2009-07-31 02:03:45 +04:00
/*
* If there are no other interrupts we want to deliver , clear
* the pending flag .
*/
2009-06-13 08:27:02 +04:00
if ( ! more )
put_user ( 0 , & cpu - > lg - > lguest_data - > irq_pending ) ;
2007-07-19 12:49:23 +04:00
}
2009-06-13 08:27:08 +04:00
/* And this is the routine when we want to set an interrupt for the Guest. */
void set_interrupt ( struct lg_cpu * cpu , unsigned int irq )
{
2009-07-31 02:03:45 +04:00
/*
* Next time the Guest runs , the core code will see if it can deliver
* this interrupt .
*/
2009-06-13 08:27:08 +04:00
set_bit ( irq , cpu - > irqs_pending ) ;
2009-07-31 02:03:45 +04:00
/*
* Make sure it sees it ; it might be asleep ( eg . halted ) , or running
* the Guest right now , in which case kick_process ( ) will knock it out .
*/
2009-06-13 08:27:08 +04:00
if ( ! wake_up_process ( cpu - > tsk ) )
kick_process ( cpu - > tsk ) ;
}
2007-10-22 05:03:35 +04:00
/*:*/
2009-07-31 02:03:45 +04:00
/*
* Linux uses trap 128 for system calls . Plan9 uses 64 , and Ron Minnich sent
2007-10-22 05:03:35 +04:00
* me a patch , so we support that too . It ' d be a big step for lguest if half
* the Plan 9 user base were to start using it .
*
* Actually now I think of it , it ' s possible that Ron * is * half the Plan 9
2009-07-31 02:03:45 +04:00
* userbase . Oh well .
*/
2007-10-22 05:03:35 +04:00
static bool could_be_syscall ( unsigned int num )
{
2015-05-11 08:17:04 +03:00
/* Normal Linux IA32_SYSCALL_VECTOR or reserved vector? */
return num = = IA32_SYSCALL_VECTOR | | num = = syscall_vector ;
2007-10-22 05:03:35 +04:00
}
/* The syscall vector it wants must be unused by Host. */
bool check_syscall_vector ( struct lguest * lg )
{
u32 vector ;
if ( get_user ( vector , & lg - > lguest_data - > syscall_vec ) )
return false ;
return could_be_syscall ( vector ) ;
}
int init_interrupts ( void )
{
/* If they want some strange system call vector, reserve it now */
2015-05-11 08:17:04 +03:00
if ( syscall_vector ! = IA32_SYSCALL_VECTOR ) {
2008-12-20 02:23:44 +03:00
if ( test_bit ( syscall_vector , used_vectors ) | |
vector_used_by_percpu_irq ( syscall_vector ) ) {
printk ( KERN_ERR " lg: couldn't reserve syscall %u \n " ,
syscall_vector ) ;
return - EBUSY ;
}
set_bit ( syscall_vector , used_vectors ) ;
2007-10-22 05:03:35 +04:00
}
2008-12-20 02:23:44 +03:00
2007-10-22 05:03:35 +04:00
return 0 ;
}
void free_interrupts ( void )
{
2015-05-11 08:17:04 +03:00
if ( syscall_vector ! = IA32_SYSCALL_VECTOR )
2007-10-22 05:03:35 +04:00
clear_bit ( syscall_vector , used_vectors ) ;
}
2007-07-19 12:49:23 +04:00
2009-07-31 02:03:45 +04:00
/*H:220
* Now we ' ve got the routines to deliver interrupts , delivering traps like
2008-03-28 19:05:53 +03:00
* page fault is easy . The only trick is that Intel decided that some traps
2009-07-31 02:03:45 +04:00
* should have error codes :
*/
2009-03-18 19:38:35 +03:00
static bool has_err ( unsigned int trap )
2007-07-19 12:49:23 +04:00
{
return ( trap = = 8 | | ( trap > = 10 & & trap < = 14 ) | | trap = = 17 ) ;
}
2007-07-26 21:41:04 +04:00
/* deliver_trap() returns true if it could deliver the trap. */
2009-03-18 19:38:35 +03:00
bool deliver_trap ( struct lg_cpu * cpu , unsigned int num )
2007-07-19 12:49:23 +04:00
{
2009-07-31 02:03:45 +04:00
/*
* Trap numbers are always 8 bit , but we set an impossible trap number
* for traps inside the Switcher , so check that here .
*/
2008-01-07 16:05:33 +03:00
if ( num > = ARRAY_SIZE ( cpu - > arch . idt ) )
2009-03-18 19:38:35 +03:00
return false ;
2007-07-19 12:49:23 +04:00
2009-07-31 02:03:45 +04:00
/*
* Early on the Guest hasn ' t set the IDT entries ( or maybe it put a
* bogus one in ) : if we fail here , the Guest will be killed .
*/
2008-01-07 16:05:33 +03:00
if ( ! idt_present ( cpu - > arch . idt [ num ] . a , cpu - > arch . idt [ num ] . b ) )
2009-03-18 19:38:35 +03:00
return false ;
2015-04-01 06:02:20 +03:00
push_guest_interrupt_stack ( cpu , has_err ( num ) ) ;
guest_run_interrupt ( cpu , cpu - > arch . idt [ num ] . a ,
cpu - > arch . idt [ num ] . b ) ;
2009-03-18 19:38:35 +03:00
return true ;
2007-07-19 12:49:23 +04:00
}
2009-07-31 02:03:45 +04:00
/*H:250
* Here ' s the hard part : returning to the Host every time a trap happens
2007-07-26 21:41:04 +04:00
* and then calling deliver_trap ( ) and re - entering the Guest is slow .
2007-10-25 09:02:50 +04:00
* Particularly because Guest userspace system calls are traps ( usually trap
* 128 ) .
2007-07-26 21:41:04 +04:00
*
* So we ' d like to set up the IDT to tell the CPU to deliver traps directly
* into the Guest . This is possible , but the complexities cause the size of
* this file to double ! However , 150 lines of code is worth writing for taking
* system calls down from 1750 ns to 270 ns . Plus , if lguest didn ' t do it , all
2007-10-25 09:02:50 +04:00
* the other hypervisors would beat it up at lunchtime .
2007-07-26 21:41:04 +04:00
*
2007-10-22 05:03:28 +04:00
* This routine indicates if a particular trap number could be delivered
2009-07-31 02:03:45 +04:00
* directly .
*/
2009-03-18 19:38:35 +03:00
static bool direct_trap ( unsigned int num )
2007-07-19 12:49:23 +04:00
{
2009-07-31 02:03:45 +04:00
/*
* Hardware interrupts don ' t go to the Guest at all ( except system
* call ) .
*/
2007-10-22 05:03:35 +04:00
if ( num > = FIRST_EXTERNAL_VECTOR & & ! could_be_syscall ( num ) )
2009-03-18 19:38:35 +03:00
return false ;
2007-07-19 12:49:23 +04:00
2009-07-31 02:03:45 +04:00
/*
* The Host needs to see page faults ( for shadow paging and to save the
2007-07-26 21:41:04 +04:00
* fault address ) , general protection faults ( in / out emulation ) and
2011-07-22 09:09:49 +04:00
* device not available ( TS handling ) and of course , the hypercall trap .
2009-07-31 02:03:45 +04:00
*/
2011-07-22 09:09:49 +04:00
return num ! = 14 & & num ! = 13 & & num ! = 7 & & num ! = LGUEST_TRAP_ENTRY ;
2007-07-19 12:49:23 +04:00
}
2007-07-26 21:41:05 +04:00
/*:*/
2009-07-31 02:03:45 +04:00
/*M:005
* The Guest has the ability to turn its interrupt gates into trap gates ,
2007-07-26 21:41:05 +04:00
* if it is careful . The Host will let trap gates can go directly to the
* Guest , but the Guest needs the interrupts atomically disabled for an
2015-03-24 04:21:39 +03:00
* interrupt gate . The Host could provide a mechanism to register more
* " no-interrupt " regions , and the Guest could point the trap gate at
* instructions within that region , where it can safely disable interrupts .
2009-07-31 02:03:45 +04:00
*/
2007-07-26 21:41:05 +04:00
2009-07-31 02:03:45 +04:00
/*M:006
* The Guests do not use the sysenter ( fast system call ) instruction ,
2007-07-26 21:41:05 +04:00
* because it ' s hardcoded to enter privilege level 0 and so can ' t go direct .
* It ' s about twice as fast as the older " int 0x80 " system call , so it might
* still be worthwhile to handle it in the Switcher and lcall down to the
* Guest . The sysenter semantics are hairy tho : search for that keyword in
2009-07-31 02:03:45 +04:00
* entry . S
: */
2007-07-19 12:49:23 +04:00
2009-07-31 02:03:45 +04:00
/*H:260
* When we make traps go directly into the Guest , we need to make sure
2007-07-26 21:41:04 +04:00
* the kernel stack is valid ( ie . mapped in the page tables ) . Otherwise , the
* CPU trying to deliver the trap will fault while trying to push the interrupt
* words on the stack : this is called a double fault , and it forces us to kill
* the Guest .
*
2009-07-31 02:03:45 +04:00
* Which is deeply unfair , because ( literally ! ) it wasn ' t the Guests ' fault .
*/
2008-01-07 16:05:35 +03:00
void pin_stack_pages ( struct lg_cpu * cpu )
2007-07-19 12:49:23 +04:00
{
unsigned int i ;
2009-07-31 02:03:45 +04:00
/*
* Depending on the CONFIG_4KSTACKS option , the Guest can have one or
* two pages of stack space .
*/
2008-01-18 00:19:42 +03:00
for ( i = 0 ; i < cpu - > lg - > stack_pages ; i + + )
2009-07-31 02:03:45 +04:00
/*
* The stack grows * upwards * , so the address we ' re given is the
2007-08-30 00:35:08 +04:00
* start of the page after the kernel stack . Subtract one to
* get back onto the first stack page , and keep subtracting to
2009-07-31 02:03:45 +04:00
* get to the rest of the stack pages .
*/
2008-01-07 16:05:37 +03:00
pin_page ( cpu , cpu - > esp1 - 1 - i * PAGE_SIZE ) ;
2007-07-19 12:49:23 +04:00
}
2009-07-31 02:03:45 +04:00
/*
* Direct traps also mean that we need to know whenever the Guest wants to use
2011-07-22 09:09:50 +04:00
* a different kernel stack , so we can change the guest TSS to use that
* stack . The TSS entries expect a virtual address , so unlike most addresses
2007-07-26 21:41:04 +04:00
* the Guest gives us , the " esp " ( stack pointer ) value here is virtual , not
* physical .
*
* In Linux each process has its own kernel stack , so this happens a lot : we
2009-07-31 02:03:45 +04:00
* change stacks on each context switch .
*/
2008-01-07 16:05:35 +03:00
void guest_set_stack ( struct lg_cpu * cpu , u32 seg , u32 esp , unsigned int pages )
2007-07-19 12:49:23 +04:00
{
2009-07-31 02:03:45 +04:00
/*
* You ' re not allowed a stack segment with privilege level 0 : bad Guest !
*/
2007-07-19 12:49:23 +04:00
if ( ( seg & 0x3 ) ! = GUEST_PL )
2008-01-18 00:19:42 +03:00
kill_guest ( cpu , " bad stack segment %i " , seg ) ;
2007-07-26 21:41:04 +04:00
/* We only expect one or two stack pages. */
2007-07-19 12:49:23 +04:00
if ( pages > 2 )
2008-01-18 00:19:42 +03:00
kill_guest ( cpu , " bad stack pages %u " , pages ) ;
2007-07-26 21:41:04 +04:00
/* Save where the stack is, and how many pages */
2008-01-07 16:05:35 +03:00
cpu - > ss1 = seg ;
cpu - > esp1 = esp ;
cpu - > lg - > stack_pages = pages ;
2007-07-26 21:41:04 +04:00
/* Make sure the new stack pages are mapped */
2008-01-07 16:05:35 +03:00
pin_stack_pages ( cpu ) ;
2007-07-19 12:49:23 +04:00
}
2009-07-31 02:03:45 +04:00
/*
* All this reference to mapping stacks leads us neatly into the other complex
* part of the Host : page table handling .
*/
2007-07-26 21:41:04 +04:00
2009-07-31 02:03:45 +04:00
/*H:235
* This is the routine which actually checks the Guest ' s IDT entry and
* transfers it into the entry in " struct lguest " :
*/
2008-01-18 00:19:42 +03:00
static void set_trap ( struct lg_cpu * cpu , struct desc_struct * trap ,
2007-07-19 12:49:23 +04:00
unsigned int num , u32 lo , u32 hi )
{
u8 type = idt_type ( lo , hi ) ;
2007-07-26 21:41:04 +04:00
/* We zero-out a not-present entry */
2007-07-19 12:49:23 +04:00
if ( ! idt_present ( lo , hi ) ) {
trap - > a = trap - > b = 0 ;
return ;
}
2007-07-26 21:41:04 +04:00
/* We only support interrupt and trap gates. */
2007-07-19 12:49:23 +04:00
if ( type ! = 0xE & & type ! = 0xF )
2008-01-18 00:19:42 +03:00
kill_guest ( cpu , " bad IDT type %i " , type ) ;
2007-07-19 12:49:23 +04:00
2009-07-31 02:03:45 +04:00
/*
* We only copy the handler address , present bit , privilege level and
2007-07-26 21:41:04 +04:00
* type . The privilege level controls where the trap can be triggered
* manually with an " int " instruction . This is usually GUEST_PL ,
2009-07-31 02:03:45 +04:00
* except for system calls which userspace can use .
*/
2007-07-19 12:49:23 +04:00
trap - > a = ( ( __KERNEL_CS | GUEST_PL ) < < 16 ) | ( lo & 0x0000FFFF ) ;
trap - > b = ( hi & 0xFFFFEF00 ) ;
}
2009-07-31 02:03:45 +04:00
/*H:230
* While we ' re here , dealing with delivering traps and interrupts to the
2007-07-26 21:41:04 +04:00
* Guest , we might as well complete the picture : how the Guest tells us where
* it wants them to go . This would be simple , except making traps fast
* requires some tricks .
*
* We saw the Guest setting Interrupt Descriptor Table ( IDT ) entries with the
2009-07-31 02:03:45 +04:00
* LHCALL_LOAD_IDT_ENTRY hypercall before : that comes here .
*/
2008-01-07 16:05:33 +03:00
void load_guest_idt_entry ( struct lg_cpu * cpu , unsigned int num , u32 lo , u32 hi )
2007-07-19 12:49:23 +04:00
{
2009-07-31 02:03:45 +04:00
/*
* Guest never handles : NMI , doublefault , spurious interrupt or
* hypercall . We ignore when it tries to set them .
*/
2007-07-19 12:49:23 +04:00
if ( num = = 2 | | num = = 8 | | num = = 15 | | num = = LGUEST_TRAP_ENTRY )
return ;
2009-07-31 02:03:45 +04:00
/*
* Mark the IDT as changed : next time the Guest runs we ' ll know we have
* to copy this again .
*/
2008-01-18 00:14:46 +03:00
cpu - > changed | = CHANGED_IDT ;
2007-07-26 21:41:04 +04:00
2007-10-22 05:03:28 +04:00
/* Check that the Guest doesn't try to step outside the bounds. */
2008-01-07 16:05:33 +03:00
if ( num > = ARRAY_SIZE ( cpu - > arch . idt ) )
2008-01-18 00:19:42 +03:00
kill_guest ( cpu , " Setting idt entry %u " , num ) ;
2007-10-22 05:03:28 +04:00
else
2008-01-18 00:19:42 +03:00
set_trap ( cpu , & cpu - > arch . idt [ num ] , num , lo , hi ) ;
2007-07-19 12:49:23 +04:00
}
2009-07-31 02:03:45 +04:00
/*
* The default entry for each interrupt points into the Switcher routines which
2007-07-26 21:41:04 +04:00
* simply return to the Host . The run_guest ( ) loop will then call
2009-07-31 02:03:45 +04:00
* deliver_trap ( ) to bounce it back into the Guest .
*/
2007-07-19 12:49:23 +04:00
static void default_idt_entry ( struct desc_struct * idt ,
int trap ,
2008-07-29 18:58:31 +04:00
const unsigned long handler ,
const struct desc_struct * base )
2007-07-19 12:49:23 +04:00
{
2007-07-26 21:41:04 +04:00
/* A present interrupt gate. */
2007-07-19 12:49:23 +04:00
u32 flags = 0x8e00 ;
2009-07-31 02:03:45 +04:00
/*
* Set the privilege level on the entry for the hypercall : this allows
* the Guest to use the " int " instruction to trigger it .
*/
2007-07-19 12:49:23 +04:00
if ( trap = = LGUEST_TRAP_ENTRY )
flags | = ( GUEST_PL < < 13 ) ;
2008-07-29 18:58:31 +04:00
else if ( base )
2009-07-31 02:03:45 +04:00
/*
* Copy privilege level from what Guest asked for . This allows
* debug ( int 3 ) traps from Guest userspace , for example .
*/
2008-07-29 18:58:31 +04:00
flags | = ( base - > b & 0x6000 ) ;
2007-07-19 12:49:23 +04:00
2007-07-26 21:41:04 +04:00
/* Now pack it into the IDT entry in its weird format. */
2007-07-19 12:49:23 +04:00
idt - > a = ( LGUEST_CS < < 16 ) | ( handler & 0x0000FFFF ) ;
idt - > b = ( handler & 0xFFFF0000 ) | flags ;
}
2007-07-26 21:41:04 +04:00
/* When the Guest first starts, we put default entries into the IDT. */
2007-07-19 12:49:23 +04:00
void setup_default_idt_entries ( struct lguest_ro_state * state ,
const unsigned long * def )
{
unsigned int i ;
for ( i = 0 ; i < ARRAY_SIZE ( state - > guest_idt ) ; i + + )
2008-07-29 18:58:31 +04:00
default_idt_entry ( & state - > guest_idt [ i ] , i , def [ i ] , NULL ) ;
2007-07-19 12:49:23 +04:00
}
2009-07-31 02:03:45 +04:00
/*H:240
* We don ' t use the IDT entries in the " struct lguest " directly , instead
2007-07-26 21:41:04 +04:00
* we copy them into the IDT which we ' ve set up for Guests on this CPU , just
2009-07-31 02:03:45 +04:00
* before we run the Guest . This routine does that copy .
*/
2008-01-07 16:05:33 +03:00
void copy_traps ( const struct lg_cpu * cpu , struct desc_struct * idt ,
2007-07-19 12:49:23 +04:00
const unsigned long * def )
{
unsigned int i ;
2009-07-31 02:03:45 +04:00
/*
* We can simply copy the direct traps , otherwise we use the default
* ones in the Switcher : they will return to the Host .
*/
2008-01-07 16:05:33 +03:00
for ( i = 0 ; i < ARRAY_SIZE ( cpu - > arch . idt ) ; i + + ) {
2008-07-29 18:58:31 +04:00
const struct desc_struct * gidt = & cpu - > arch . idt [ i ] ;
2007-10-22 05:03:28 +04:00
/* If no Guest can ever override this trap, leave it alone. */
if ( ! direct_trap ( i ) )
continue ;
2009-07-31 02:03:45 +04:00
/*
* Only trap gates ( type 15 ) can go direct to the Guest .
2007-10-22 05:03:28 +04:00
* Interrupt gates ( type 14 ) disable interrupts as they are
* entered , which we never let the Guest do . Not present
2008-07-29 18:58:31 +04:00
* entries ( type 0x0 ) also can ' t go direct , of course .
*
* If it can ' t go direct , we still need to copy the priv . level :
* they might want to give userspace access to a software
2009-07-31 02:03:45 +04:00
* interrupt .
*/
2008-07-29 18:58:31 +04:00
if ( idt_type ( gidt - > a , gidt - > b ) = = 0xF )
idt [ i ] = * gidt ;
2007-07-19 12:49:23 +04:00
else
2008-07-29 18:58:31 +04:00
default_idt_entry ( & idt [ i ] , i , def [ i ] , gidt ) ;
2007-07-19 12:49:23 +04:00
}
}
2007-10-25 09:02:50 +04:00
/*H:200
* The Guest Clock .
*
* There are two sources of virtual interrupts . We saw one in lguest_user . c :
* the Launcher sending interrupts for virtual devices . The other is the Guest
* timer interrupt .
*
* The Guest uses the LHCALL_SET_CLOCKEVENT hypercall to tell us how long to
* the next timer interrupt ( in nanoseconds ) . We use the high - resolution timer
* infrastructure to set a callback at that time .
*
2009-07-31 02:03:45 +04:00
* 0 means " turn off the clock " .
*/
2008-01-07 16:05:28 +03:00
void guest_set_clockevent ( struct lg_cpu * cpu , unsigned long delta )
2007-07-19 12:49:23 +04:00
{
ktime_t expires ;
if ( unlikely ( delta = = 0 ) ) {
/* Clock event device is shutting down. */
2008-01-07 16:05:28 +03:00
hrtimer_cancel ( & cpu - > hrt ) ;
2007-07-19 12:49:23 +04:00
return ;
}
2009-07-31 02:03:45 +04:00
/*
* We use wallclock time here , so the Guest might not be running for
2007-10-25 09:02:50 +04:00
* all the time between now and the timer interrupt it asked for . This
2009-07-31 02:03:45 +04:00
* is almost always the right thing to do .
*/
2007-07-19 12:49:23 +04:00
expires = ktime_add_ns ( ktime_get_real ( ) , delta ) ;
2008-01-07 16:05:28 +03:00
hrtimer_start ( & cpu - > hrt , expires , HRTIMER_MODE_ABS ) ;
2007-07-19 12:49:23 +04:00
}
2007-10-25 09:02:50 +04:00
/* This is the function called when the Guest's timer expires. */
2007-07-19 12:49:23 +04:00
static enum hrtimer_restart clockdev_fn ( struct hrtimer * timer )
{
2008-01-07 16:05:28 +03:00
struct lg_cpu * cpu = container_of ( timer , struct lg_cpu , hrt ) ;
2007-07-19 12:49:23 +04:00
2007-10-25 09:02:50 +04:00
/* Remember the first interrupt is the timer interrupt. */
2009-06-13 08:27:08 +04:00
set_interrupt ( cpu , 0 ) ;
2007-07-19 12:49:23 +04:00
return HRTIMER_NORESTART ;
}
2007-10-25 09:02:50 +04:00
/* This sets up the timer for this Guest. */
2008-01-07 16:05:28 +03:00
void init_clockdev ( struct lg_cpu * cpu )
2007-07-19 12:49:23 +04:00
{
2008-01-07 16:05:28 +03:00
hrtimer_init ( & cpu - > hrt , CLOCK_REALTIME , HRTIMER_MODE_ABS ) ;
cpu - > hrt . function = clockdev_fn ;
2007-07-19 12:49:23 +04:00
}