2007-07-26 21:41:02 +04:00
/*P:800 Interrupts (traps) are complicated enough to earn their own file.
* There are three classes of interrupts :
*
* 1 ) Real hardware interrupts which occur while we ' re running the Guest ,
* 2 ) Interrupts for virtual devices attached to the Guest , and
* 3 ) Traps and faults from the Guest .
*
* Real hardware interrupts must be delivered to the Host , not the Guest .
* Virtual interrupts must be delivered to the Guest , but we make them look
* just like real hardware would deliver them . Traps from the Guest can be set
* up to go directly back into the Guest , but sometimes the Host wants to see
* them first , so we also have a way of " reflecting " them into the Guest as if
* they had been delivered to it directly . : */
2007-07-19 12:49:23 +04:00
# include <linux/uaccess.h>
2007-10-22 05:03:35 +04:00
# include <linux/interrupt.h>
# include <linux/module.h>
2007-07-19 12:49:23 +04:00
# include "lg.h"
2007-10-22 05:03:35 +04:00
/* Allow Guests to use a non-128 (ie. non-Linux) syscall trap. */
static unsigned int syscall_vector = SYSCALL_VECTOR ;
module_param ( syscall_vector , uint , 0444 ) ;
2007-07-26 21:41:04 +04:00
/* The address of the interrupt handler is split into two bits: */
2007-07-19 12:49:23 +04:00
static unsigned long idt_address ( u32 lo , u32 hi )
{
return ( lo & 0x0000FFFF ) | ( hi & 0xFFFF0000 ) ;
}
2007-07-26 21:41:04 +04:00
/* The "type" of the interrupt handler is a 4 bit field: we only support a
* couple of types . */
2007-07-19 12:49:23 +04:00
static int idt_type ( u32 lo , u32 hi )
{
return ( hi > > 8 ) & 0xF ;
}
2007-07-26 21:41:04 +04:00
/* An IDT entry can't be used unless the "present" bit is set. */
2009-03-18 19:38:35 +03:00
static bool idt_present ( u32 lo , u32 hi )
2007-07-19 12:49:23 +04:00
{
return ( hi & 0x8000 ) ;
}
2007-07-26 21:41:04 +04:00
/* We need a helper to "push" a value onto the Guest's stack, since that's a
* big part of what delivering an interrupt does . */
2008-01-18 00:19:42 +03:00
static void push_guest_stack ( struct lg_cpu * cpu , unsigned long * gstack , u32 val )
2007-07-19 12:49:23 +04:00
{
2007-07-26 21:41:04 +04:00
/* Stack grows upwards: move stack then write value. */
2007-07-19 12:49:23 +04:00
* gstack - = 4 ;
2008-01-18 00:19:42 +03:00
lgwrite ( cpu , * gstack , u32 , val ) ;
2007-07-19 12:49:23 +04:00
}
2007-07-26 21:41:04 +04:00
/*H:210 The set_guest_interrupt() routine actually delivers the interrupt or
* trap . The mechanics of delivering traps and interrupts to the Guest are the
* same , except some traps have an " error code " which gets pushed onto the
* stack as well : the caller tells us if this is one .
*
* " lo " and " hi " are the two parts of the Interrupt Descriptor Table for this
* interrupt or trap . It ' s split into two parts for traditional reasons : gcc
* on i386 used to be frightened by 64 bit numbers .
*
* We set up the stack just like the CPU does for a real interrupt , so it ' s
* identical for the Guest ( and the standard " iret " instruction will undo
* it ) . */
2009-03-18 19:38:35 +03:00
static void set_guest_interrupt ( struct lg_cpu * cpu , u32 lo , u32 hi ,
bool has_err )
2007-07-19 12:49:23 +04:00
{
2007-10-22 05:03:36 +04:00
unsigned long gstack , origstack ;
2007-07-19 12:49:23 +04:00
u32 eflags , ss , irq_enable ;
2007-10-22 05:03:36 +04:00
unsigned long virtstack ;
2007-07-19 12:49:23 +04:00
2007-07-26 21:41:04 +04:00
/* There are two cases for interrupts: one where the Guest is already
* in the kernel , and a more complex one where the Guest is in
* userspace . We check the privilege level to find out . */
2008-01-07 16:05:32 +03:00
if ( ( cpu - > regs - > ss & 0x3 ) ! = GUEST_PL ) {
2007-07-26 21:41:04 +04:00
/* The Guest told us their kernel stack with the SET_STACK
* hypercall : both the virtual address and the segment */
2008-01-07 16:05:35 +03:00
virtstack = cpu - > esp1 ;
ss = cpu - > ss1 ;
2007-10-22 05:03:36 +04:00
2008-01-07 16:05:37 +03:00
origstack = gstack = guest_pa ( cpu , virtstack ) ;
2007-07-26 21:41:04 +04:00
/* We push the old stack segment and pointer onto the new
* stack : when the Guest does an " iret " back from the interrupt
* handler the CPU will notice they ' re dropping privilege
* levels and expect these here . */
2008-01-18 00:19:42 +03:00
push_guest_stack ( cpu , & gstack , cpu - > regs - > ss ) ;
push_guest_stack ( cpu , & gstack , cpu - > regs - > esp ) ;
2007-07-19 12:49:23 +04:00
} else {
2007-07-26 21:41:04 +04:00
/* We're staying on the same Guest (kernel) stack. */
2008-01-07 16:05:32 +03:00
virtstack = cpu - > regs - > esp ;
ss = cpu - > regs - > ss ;
2007-10-22 05:03:36 +04:00
2008-01-07 16:05:37 +03:00
origstack = gstack = guest_pa ( cpu , virtstack ) ;
2007-07-19 12:49:23 +04:00
}
2007-07-26 21:41:04 +04:00
/* Remember that we never let the Guest actually disable interrupts, so
* the " Interrupt Flag " bit is always set . We copy that bit from the
2007-10-25 09:02:50 +04:00
* Guest ' s " irq_enabled " field into the eflags word : we saw the Guest
* copy it back in " lguest_iret " . */
2008-01-07 16:05:32 +03:00
eflags = cpu - > regs - > eflags ;
2008-01-18 00:19:42 +03:00
if ( get_user ( irq_enable , & cpu - > lg - > lguest_data - > irq_enabled ) = = 0
2007-07-20 16:11:13 +04:00
& & ! ( irq_enable & X86_EFLAGS_IF ) )
eflags & = ~ X86_EFLAGS_IF ;
2007-07-19 12:49:23 +04:00
2007-07-26 21:41:04 +04:00
/* An interrupt is expected to push three things on the stack: the old
* " eflags " word , the old code segment , and the old instruction
* pointer . */
2008-01-18 00:19:42 +03:00
push_guest_stack ( cpu , & gstack , eflags ) ;
push_guest_stack ( cpu , & gstack , cpu - > regs - > cs ) ;
push_guest_stack ( cpu , & gstack , cpu - > regs - > eip ) ;
2007-07-19 12:49:23 +04:00
2007-07-26 21:41:04 +04:00
/* For the six traps which supply an error code, we push that, too. */
2007-07-19 12:49:23 +04:00
if ( has_err )
2008-01-18 00:19:42 +03:00
push_guest_stack ( cpu , & gstack , cpu - > regs - > errcode ) ;
2007-07-19 12:49:23 +04:00
2007-07-26 21:41:04 +04:00
/* Now we've pushed all the old state, we change the stack, the code
* segment and the address to execute . */
2008-01-07 16:05:32 +03:00
cpu - > regs - > ss = ss ;
cpu - > regs - > esp = virtstack + ( gstack - origstack ) ;
cpu - > regs - > cs = ( __KERNEL_CS | GUEST_PL ) ;
cpu - > regs - > eip = idt_address ( lo , hi ) ;
2007-07-19 12:49:23 +04:00
2007-07-26 21:41:04 +04:00
/* There are two kinds of interrupt handlers: 0xE is an "interrupt
* gate " which expects interrupts to be disabled on entry. */
2007-07-19 12:49:23 +04:00
if ( idt_type ( lo , hi ) = = 0xE )
2008-01-18 00:19:42 +03:00
if ( put_user ( 0 , & cpu - > lg - > lguest_data - > irq_enabled ) )
kill_guest ( cpu , " Disabling interrupts " ) ;
2007-07-19 12:49:23 +04:00
}
2007-10-25 09:02:50 +04:00
/*H:205
2007-07-26 21:41:04 +04:00
* Virtual Interrupts .
*
* maybe_do_interrupt ( ) gets called before every entry to the Guest , to see if
* we should divert the Guest to running an interrupt handler . */
2008-01-07 16:05:29 +03:00
void maybe_do_interrupt ( struct lg_cpu * cpu )
2007-07-19 12:49:23 +04:00
{
unsigned int irq ;
DECLARE_BITMAP ( blk , LGUEST_IRQS ) ;
struct desc_struct * idt ;
2007-07-26 21:41:04 +04:00
/* If the Guest hasn't even initialized yet, we can do nothing. */
2008-01-18 00:19:42 +03:00
if ( ! cpu - > lg - > lguest_data )
2007-07-19 12:49:23 +04:00
return ;
2007-07-26 21:41:04 +04:00
/* Take our "irqs_pending" array and remove any interrupts the Guest
* wants blocked : the result ends up in " blk " . */
2008-01-18 00:19:42 +03:00
if ( copy_from_user ( & blk , cpu - > lg - > lguest_data - > blocked_interrupts ,
2007-07-19 12:49:23 +04:00
sizeof ( blk ) ) )
return ;
2008-01-07 16:05:29 +03:00
bitmap_andnot ( blk , cpu - > irqs_pending , blk , LGUEST_IRQS ) ;
2007-07-19 12:49:23 +04:00
2007-07-26 21:41:04 +04:00
/* Find the first interrupt. */
2007-07-19 12:49:23 +04:00
irq = find_first_bit ( blk , LGUEST_IRQS ) ;
2007-07-26 21:41:04 +04:00
/* None? Nothing to do */
2007-07-19 12:49:23 +04:00
if ( irq > = LGUEST_IRQS )
return ;
2007-07-26 21:41:04 +04:00
/* They may be in the middle of an iret, where they asked us never to
* deliver interrupts . */
2008-01-18 00:19:42 +03:00
if ( cpu - > regs - > eip > = cpu - > lg - > noirq_start & &
( cpu - > regs - > eip < cpu - > lg - > noirq_end ) )
2007-07-19 12:49:23 +04:00
return ;
2007-07-26 21:41:04 +04:00
/* If they're halted, interrupts restart them. */
2008-01-07 16:05:34 +03:00
if ( cpu - > halted ) {
2007-07-19 12:49:23 +04:00
/* Re-enable interrupts. */
2008-01-18 00:19:42 +03:00
if ( put_user ( X86_EFLAGS_IF , & cpu - > lg - > lguest_data - > irq_enabled ) )
kill_guest ( cpu , " Re-enabling interrupts " ) ;
2008-01-07 16:05:34 +03:00
cpu - > halted = 0 ;
2007-07-19 12:49:23 +04:00
} else {
2007-07-26 21:41:04 +04:00
/* Otherwise we check if they have interrupts disabled. */
2007-07-19 12:49:23 +04:00
u32 irq_enabled ;
2008-01-18 00:19:42 +03:00
if ( get_user ( irq_enabled , & cpu - > lg - > lguest_data - > irq_enabled ) )
2007-07-19 12:49:23 +04:00
irq_enabled = 0 ;
if ( ! irq_enabled )
return ;
}
2007-07-26 21:41:04 +04:00
/* Look at the IDT entry the Guest gave us for this interrupt. The
* first 32 ( FIRST_EXTERNAL_VECTOR ) entries are for traps , so we skip
* over them . */
2008-01-07 16:05:33 +03:00
idt = & cpu - > arch . idt [ FIRST_EXTERNAL_VECTOR + irq ] ;
2007-07-26 21:41:04 +04:00
/* If they don't have a handler (yet?), we just ignore it */
2007-07-19 12:49:23 +04:00
if ( idt_present ( idt - > a , idt - > b ) ) {
2007-07-26 21:41:04 +04:00
/* OK, mark it no longer pending and deliver it. */
2008-01-07 16:05:29 +03:00
clear_bit ( irq , cpu - > irqs_pending ) ;
2007-07-26 21:41:04 +04:00
/* set_guest_interrupt() takes the interrupt descriptor and a
* flag to say whether this interrupt pushes an error code onto
* the stack as well : virtual interrupts never do . */
2009-03-18 19:38:35 +03:00
set_guest_interrupt ( cpu , idt - > a , idt - > b , false ) ;
2007-07-19 12:49:23 +04:00
}
2007-07-27 07:42:52 +04:00
/* Every time we deliver an interrupt, we update the timestamp in the
* Guest ' s lguest_data struct . It would be better for the Guest if we
* did this more often , but it can actually be quite slow : doing it
* here is a compromise which means at least it gets updated every
* timer interrupt . */
2008-01-18 00:19:42 +03:00
write_timestamp ( cpu ) ;
2007-07-19 12:49:23 +04:00
}
2007-10-22 05:03:35 +04:00
/*:*/
/* Linux uses trap 128 for system calls. Plan9 uses 64, and Ron Minnich sent
* me a patch , so we support that too . It ' d be a big step for lguest if half
* the Plan 9 user base were to start using it .
*
* Actually now I think of it , it ' s possible that Ron * is * half the Plan 9
* userbase . Oh well . */
static bool could_be_syscall ( unsigned int num )
{
/* Normal Linux SYSCALL_VECTOR or reserved vector? */
return num = = SYSCALL_VECTOR | | num = = syscall_vector ;
}
/* The syscall vector it wants must be unused by Host. */
bool check_syscall_vector ( struct lguest * lg )
{
u32 vector ;
if ( get_user ( vector , & lg - > lguest_data - > syscall_vec ) )
return false ;
return could_be_syscall ( vector ) ;
}
int init_interrupts ( void )
{
/* If they want some strange system call vector, reserve it now */
2008-12-20 02:23:44 +03:00
if ( syscall_vector ! = SYSCALL_VECTOR ) {
if ( test_bit ( syscall_vector , used_vectors ) | |
vector_used_by_percpu_irq ( syscall_vector ) ) {
printk ( KERN_ERR " lg: couldn't reserve syscall %u \n " ,
syscall_vector ) ;
return - EBUSY ;
}
set_bit ( syscall_vector , used_vectors ) ;
2007-10-22 05:03:35 +04:00
}
2008-12-20 02:23:44 +03:00
2007-10-22 05:03:35 +04:00
return 0 ;
}
void free_interrupts ( void )
{
if ( syscall_vector ! = SYSCALL_VECTOR )
clear_bit ( syscall_vector , used_vectors ) ;
}
2007-07-19 12:49:23 +04:00
2008-03-28 19:05:53 +03:00
/*H:220 Now we've got the routines to deliver interrupts, delivering traps like
* page fault is easy . The only trick is that Intel decided that some traps
* should have error codes : */
2009-03-18 19:38:35 +03:00
static bool has_err ( unsigned int trap )
2007-07-19 12:49:23 +04:00
{
return ( trap = = 8 | | ( trap > = 10 & & trap < = 14 ) | | trap = = 17 ) ;
}
2007-07-26 21:41:04 +04:00
/* deliver_trap() returns true if it could deliver the trap. */
2009-03-18 19:38:35 +03:00
bool deliver_trap ( struct lg_cpu * cpu , unsigned int num )
2007-07-19 12:49:23 +04:00
{
2007-08-09 14:57:13 +04:00
/* Trap numbers are always 8 bit, but we set an impossible trap number
* for traps inside the Switcher , so check that here . */
2008-01-07 16:05:33 +03:00
if ( num > = ARRAY_SIZE ( cpu - > arch . idt ) )
2009-03-18 19:38:35 +03:00
return false ;
2007-07-19 12:49:23 +04:00
2007-07-26 21:41:04 +04:00
/* Early on the Guest hasn't set the IDT entries (or maybe it put a
* bogus one in ) : if we fail here , the Guest will be killed . */
2008-01-07 16:05:33 +03:00
if ( ! idt_present ( cpu - > arch . idt [ num ] . a , cpu - > arch . idt [ num ] . b ) )
2009-03-18 19:38:35 +03:00
return false ;
2008-01-07 16:05:33 +03:00
set_guest_interrupt ( cpu , cpu - > arch . idt [ num ] . a ,
cpu - > arch . idt [ num ] . b , has_err ( num ) ) ;
2009-03-18 19:38:35 +03:00
return true ;
2007-07-19 12:49:23 +04:00
}
2007-07-26 21:41:04 +04:00
/*H:250 Here's the hard part: returning to the Host every time a trap happens
* and then calling deliver_trap ( ) and re - entering the Guest is slow .
2007-10-25 09:02:50 +04:00
* Particularly because Guest userspace system calls are traps ( usually trap
* 128 ) .
2007-07-26 21:41:04 +04:00
*
* So we ' d like to set up the IDT to tell the CPU to deliver traps directly
* into the Guest . This is possible , but the complexities cause the size of
* this file to double ! However , 150 lines of code is worth writing for taking
* system calls down from 1750 ns to 270 ns . Plus , if lguest didn ' t do it , all
2007-10-25 09:02:50 +04:00
* the other hypervisors would beat it up at lunchtime .
2007-07-26 21:41:04 +04:00
*
2007-10-22 05:03:28 +04:00
* This routine indicates if a particular trap number could be delivered
* directly . */
2009-03-18 19:38:35 +03:00
static bool direct_trap ( unsigned int num )
2007-07-19 12:49:23 +04:00
{
2007-07-26 21:41:04 +04:00
/* Hardware interrupts don't go to the Guest at all (except system
* call ) . */
2007-10-22 05:03:35 +04:00
if ( num > = FIRST_EXTERNAL_VECTOR & & ! could_be_syscall ( num ) )
2009-03-18 19:38:35 +03:00
return false ;
2007-07-19 12:49:23 +04:00
2007-07-26 21:41:04 +04:00
/* The Host needs to see page faults (for shadow paging and to save the
* fault address ) , general protection faults ( in / out emulation ) and
2009-03-14 18:37:52 +03:00
* device not available ( TS handling ) , invalid opcode fault ( kvm hcall ) ,
* and of course , the hypercall trap . */
return num ! = 14 & & num ! = 13 & & num ! = 7 & &
num ! = 6 & & num ! = LGUEST_TRAP_ENTRY ;
2007-07-19 12:49:23 +04:00
}
2007-07-26 21:41:05 +04:00
/*:*/
/*M:005 The Guest has the ability to turn its interrupt gates into trap gates,
* if it is careful . The Host will let trap gates can go directly to the
* Guest , but the Guest needs the interrupts atomically disabled for an
* interrupt gate . It can do this by pointing the trap gate at instructions
* within noirq_start and noirq_end , where it can safely disable interrupts . */
/*M:006 The Guests do not use the sysenter (fast system call) instruction,
* because it ' s hardcoded to enter privilege level 0 and so can ' t go direct .
* It ' s about twice as fast as the older " int 0x80 " system call , so it might
* still be worthwhile to handle it in the Switcher and lcall down to the
* Guest . The sysenter semantics are hairy tho : search for that keyword in
* entry . S : */
2007-07-19 12:49:23 +04:00
2007-07-26 21:41:04 +04:00
/*H:260 When we make traps go directly into the Guest, we need to make sure
* the kernel stack is valid ( ie . mapped in the page tables ) . Otherwise , the
* CPU trying to deliver the trap will fault while trying to push the interrupt
* words on the stack : this is called a double fault , and it forces us to kill
* the Guest .
*
* Which is deeply unfair , because ( literally ! ) it wasn ' t the Guests ' fault . */
2008-01-07 16:05:35 +03:00
void pin_stack_pages ( struct lg_cpu * cpu )
2007-07-19 12:49:23 +04:00
{
unsigned int i ;
2007-07-26 21:41:04 +04:00
/* Depending on the CONFIG_4KSTACKS option, the Guest can have one or
* two pages of stack space . */
2008-01-18 00:19:42 +03:00
for ( i = 0 ; i < cpu - > lg - > stack_pages ; i + + )
2007-08-30 00:35:08 +04:00
/* The stack grows *upwards*, so the address we're given is the
* start of the page after the kernel stack . Subtract one to
* get back onto the first stack page , and keep subtracting to
* get to the rest of the stack pages . */
2008-01-07 16:05:37 +03:00
pin_page ( cpu , cpu - > esp1 - 1 - i * PAGE_SIZE ) ;
2007-07-19 12:49:23 +04:00
}
2007-07-26 21:41:04 +04:00
/* Direct traps also mean that we need to know whenever the Guest wants to use
* a different kernel stack , so we can change the IDT entries to use that
* stack . The IDT entries expect a virtual address , so unlike most addresses
* the Guest gives us , the " esp " ( stack pointer ) value here is virtual , not
* physical .
*
* In Linux each process has its own kernel stack , so this happens a lot : we
* change stacks on each context switch . */
2008-01-07 16:05:35 +03:00
void guest_set_stack ( struct lg_cpu * cpu , u32 seg , u32 esp , unsigned int pages )
2007-07-19 12:49:23 +04:00
{
2007-10-25 09:02:50 +04:00
/* You are not allowed have a stack segment with privilege level 0: bad
2007-07-26 21:41:04 +04:00
* Guest ! */
2007-07-19 12:49:23 +04:00
if ( ( seg & 0x3 ) ! = GUEST_PL )
2008-01-18 00:19:42 +03:00
kill_guest ( cpu , " bad stack segment %i " , seg ) ;
2007-07-26 21:41:04 +04:00
/* We only expect one or two stack pages. */
2007-07-19 12:49:23 +04:00
if ( pages > 2 )
2008-01-18 00:19:42 +03:00
kill_guest ( cpu , " bad stack pages %u " , pages ) ;
2007-07-26 21:41:04 +04:00
/* Save where the stack is, and how many pages */
2008-01-07 16:05:35 +03:00
cpu - > ss1 = seg ;
cpu - > esp1 = esp ;
cpu - > lg - > stack_pages = pages ;
2007-07-26 21:41:04 +04:00
/* Make sure the new stack pages are mapped */
2008-01-07 16:05:35 +03:00
pin_stack_pages ( cpu ) ;
2007-07-19 12:49:23 +04:00
}
2007-07-26 21:41:04 +04:00
/* All this reference to mapping stacks leads us neatly into the other complex
* part of the Host : page table handling . */
/*H:235 This is the routine which actually checks the Guest's IDT entry and
2007-10-25 09:02:50 +04:00
* transfers it into the entry in " struct lguest " : */
2008-01-18 00:19:42 +03:00
static void set_trap ( struct lg_cpu * cpu , struct desc_struct * trap ,
2007-07-19 12:49:23 +04:00
unsigned int num , u32 lo , u32 hi )
{
u8 type = idt_type ( lo , hi ) ;
2007-07-26 21:41:04 +04:00
/* We zero-out a not-present entry */
2007-07-19 12:49:23 +04:00
if ( ! idt_present ( lo , hi ) ) {
trap - > a = trap - > b = 0 ;
return ;
}
2007-07-26 21:41:04 +04:00
/* We only support interrupt and trap gates. */
2007-07-19 12:49:23 +04:00
if ( type ! = 0xE & & type ! = 0xF )
2008-01-18 00:19:42 +03:00
kill_guest ( cpu , " bad IDT type %i " , type ) ;
2007-07-19 12:49:23 +04:00
2007-07-26 21:41:04 +04:00
/* We only copy the handler address, present bit, privilege level and
* type . The privilege level controls where the trap can be triggered
* manually with an " int " instruction . This is usually GUEST_PL ,
* except for system calls which userspace can use . */
2007-07-19 12:49:23 +04:00
trap - > a = ( ( __KERNEL_CS | GUEST_PL ) < < 16 ) | ( lo & 0x0000FFFF ) ;
trap - > b = ( hi & 0xFFFFEF00 ) ;
}
2007-07-26 21:41:04 +04:00
/*H:230 While we're here, dealing with delivering traps and interrupts to the
* Guest , we might as well complete the picture : how the Guest tells us where
* it wants them to go . This would be simple , except making traps fast
* requires some tricks .
*
* We saw the Guest setting Interrupt Descriptor Table ( IDT ) entries with the
* LHCALL_LOAD_IDT_ENTRY hypercall before : that comes here . */
2008-01-07 16:05:33 +03:00
void load_guest_idt_entry ( struct lg_cpu * cpu , unsigned int num , u32 lo , u32 hi )
2007-07-19 12:49:23 +04:00
{
2007-07-26 21:41:04 +04:00
/* Guest never handles: NMI, doublefault, spurious interrupt or
* hypercall . We ignore when it tries to set them . */
2007-07-19 12:49:23 +04:00
if ( num = = 2 | | num = = 8 | | num = = 15 | | num = = LGUEST_TRAP_ENTRY )
return ;
2007-07-26 21:41:04 +04:00
/* Mark the IDT as changed: next time the Guest runs we'll know we have
* to copy this again . */
2008-01-18 00:14:46 +03:00
cpu - > changed | = CHANGED_IDT ;
2007-07-26 21:41:04 +04:00
2007-10-22 05:03:28 +04:00
/* Check that the Guest doesn't try to step outside the bounds. */
2008-01-07 16:05:33 +03:00
if ( num > = ARRAY_SIZE ( cpu - > arch . idt ) )
2008-01-18 00:19:42 +03:00
kill_guest ( cpu , " Setting idt entry %u " , num ) ;
2007-10-22 05:03:28 +04:00
else
2008-01-18 00:19:42 +03:00
set_trap ( cpu , & cpu - > arch . idt [ num ] , num , lo , hi ) ;
2007-07-19 12:49:23 +04:00
}
2007-07-26 21:41:04 +04:00
/* The default entry for each interrupt points into the Switcher routines which
* simply return to the Host . The run_guest ( ) loop will then call
* deliver_trap ( ) to bounce it back into the Guest . */
2007-07-19 12:49:23 +04:00
static void default_idt_entry ( struct desc_struct * idt ,
int trap ,
2008-07-29 18:58:31 +04:00
const unsigned long handler ,
const struct desc_struct * base )
2007-07-19 12:49:23 +04:00
{
2007-07-26 21:41:04 +04:00
/* A present interrupt gate. */
2007-07-19 12:49:23 +04:00
u32 flags = 0x8e00 ;
2007-07-26 21:41:04 +04:00
/* Set the privilege level on the entry for the hypercall: this allows
* the Guest to use the " int " instruction to trigger it . */
2007-07-19 12:49:23 +04:00
if ( trap = = LGUEST_TRAP_ENTRY )
flags | = ( GUEST_PL < < 13 ) ;
2008-07-29 18:58:31 +04:00
else if ( base )
/* Copy priv. level from what Guest asked for. This allows
* debug ( int 3 ) traps from Guest userspace , for example . */
flags | = ( base - > b & 0x6000 ) ;
2007-07-19 12:49:23 +04:00
2007-07-26 21:41:04 +04:00
/* Now pack it into the IDT entry in its weird format. */
2007-07-19 12:49:23 +04:00
idt - > a = ( LGUEST_CS < < 16 ) | ( handler & 0x0000FFFF ) ;
idt - > b = ( handler & 0xFFFF0000 ) | flags ;
}
2007-07-26 21:41:04 +04:00
/* When the Guest first starts, we put default entries into the IDT. */
2007-07-19 12:49:23 +04:00
void setup_default_idt_entries ( struct lguest_ro_state * state ,
const unsigned long * def )
{
unsigned int i ;
for ( i = 0 ; i < ARRAY_SIZE ( state - > guest_idt ) ; i + + )
2008-07-29 18:58:31 +04:00
default_idt_entry ( & state - > guest_idt [ i ] , i , def [ i ] , NULL ) ;
2007-07-19 12:49:23 +04:00
}
2007-07-26 21:41:04 +04:00
/*H:240 We don't use the IDT entries in the "struct lguest" directly, instead
* we copy them into the IDT which we ' ve set up for Guests on this CPU , just
* before we run the Guest . This routine does that copy . */
2008-01-07 16:05:33 +03:00
void copy_traps ( const struct lg_cpu * cpu , struct desc_struct * idt ,
2007-07-19 12:49:23 +04:00
const unsigned long * def )
{
unsigned int i ;
2007-07-26 21:41:04 +04:00
/* We can simply copy the direct traps, otherwise we use the default
* ones in the Switcher : they will return to the Host . */
2008-01-07 16:05:33 +03:00
for ( i = 0 ; i < ARRAY_SIZE ( cpu - > arch . idt ) ; i + + ) {
2008-07-29 18:58:31 +04:00
const struct desc_struct * gidt = & cpu - > arch . idt [ i ] ;
2007-10-22 05:03:28 +04:00
/* If no Guest can ever override this trap, leave it alone. */
if ( ! direct_trap ( i ) )
continue ;
/* Only trap gates (type 15) can go direct to the Guest.
* Interrupt gates ( type 14 ) disable interrupts as they are
* entered , which we never let the Guest do . Not present
2008-07-29 18:58:31 +04:00
* entries ( type 0x0 ) also can ' t go direct , of course .
*
* If it can ' t go direct , we still need to copy the priv . level :
* they might want to give userspace access to a software
* interrupt . */
if ( idt_type ( gidt - > a , gidt - > b ) = = 0xF )
idt [ i ] = * gidt ;
2007-07-19 12:49:23 +04:00
else
2008-07-29 18:58:31 +04:00
default_idt_entry ( & idt [ i ] , i , def [ i ] , gidt ) ;
2007-07-19 12:49:23 +04:00
}
}
2007-10-25 09:02:50 +04:00
/*H:200
* The Guest Clock .
*
* There are two sources of virtual interrupts . We saw one in lguest_user . c :
* the Launcher sending interrupts for virtual devices . The other is the Guest
* timer interrupt .
*
* The Guest uses the LHCALL_SET_CLOCKEVENT hypercall to tell us how long to
* the next timer interrupt ( in nanoseconds ) . We use the high - resolution timer
* infrastructure to set a callback at that time .
*
* 0 means " turn off the clock " . */
2008-01-07 16:05:28 +03:00
void guest_set_clockevent ( struct lg_cpu * cpu , unsigned long delta )
2007-07-19 12:49:23 +04:00
{
ktime_t expires ;
if ( unlikely ( delta = = 0 ) ) {
/* Clock event device is shutting down. */
2008-01-07 16:05:28 +03:00
hrtimer_cancel ( & cpu - > hrt ) ;
2007-07-19 12:49:23 +04:00
return ;
}
2007-10-25 09:02:50 +04:00
/* We use wallclock time here, so the Guest might not be running for
* all the time between now and the timer interrupt it asked for . This
* is almost always the right thing to do . */
2007-07-19 12:49:23 +04:00
expires = ktime_add_ns ( ktime_get_real ( ) , delta ) ;
2008-01-07 16:05:28 +03:00
hrtimer_start ( & cpu - > hrt , expires , HRTIMER_MODE_ABS ) ;
2007-07-19 12:49:23 +04:00
}
2007-10-25 09:02:50 +04:00
/* This is the function called when the Guest's timer expires. */
2007-07-19 12:49:23 +04:00
static enum hrtimer_restart clockdev_fn ( struct hrtimer * timer )
{
2008-01-07 16:05:28 +03:00
struct lg_cpu * cpu = container_of ( timer , struct lg_cpu , hrt ) ;
2007-07-19 12:49:23 +04:00
2007-10-25 09:02:50 +04:00
/* Remember the first interrupt is the timer interrupt. */
2008-01-07 16:05:29 +03:00
set_bit ( 0 , cpu - > irqs_pending ) ;
2009-06-13 08:27:01 +04:00
/* Guest may be stopped or running on another CPU. */
if ( ! wake_up_process ( cpu - > tsk ) )
kick_process ( cpu - > tsk ) ;
2007-07-19 12:49:23 +04:00
return HRTIMER_NORESTART ;
}
2007-10-25 09:02:50 +04:00
/* This sets up the timer for this Guest. */
2008-01-07 16:05:28 +03:00
void init_clockdev ( struct lg_cpu * cpu )
2007-07-19 12:49:23 +04:00
{
2008-01-07 16:05:28 +03:00
hrtimer_init ( & cpu - > hrt , CLOCK_REALTIME , HRTIMER_MODE_ABS ) ;
cpu - > hrt . function = clockdev_fn ;
2007-07-19 12:49:23 +04:00
}