2007-07-26 21:41:02 +04:00
/*P:200 This contains all the /dev/lguest code, whereby the userspace launcher
* controls and communicates with the Guest . For example , the first write will
2007-10-22 05:03:26 +04:00
* tell us the Guest ' s memory layout , pagetable , entry point and kernel address
* offset . A read will run the Guest until something happens , such as a signal
2007-10-22 05:24:10 +04:00
* or the Guest doing a NOTIFY out to the Launcher . : */
2007-07-19 12:49:23 +04:00
# include <linux/uaccess.h>
# include <linux/miscdevice.h>
# include <linux/fs.h>
# include "lg.h"
2007-10-25 09:02:50 +04:00
/*L:055 When something happens, the Waker process needs a way to stop the
* kernel running the Guest and return to the Launcher . So the Waker writes
* LHREQ_BREAK and the value " 1 " to / dev / lguest to do this . Once the Launcher
* has done whatever needs attention , it writes LHREQ_BREAK and " 0 " to release
* the Waker . */
2008-01-07 16:05:34 +03:00
static int break_guest_out ( struct lg_cpu * cpu , const unsigned long __user * input )
2007-07-19 12:49:23 +04:00
{
unsigned long on ;
2007-10-25 09:02:50 +04:00
/* Fetch whether they're turning break on or off. */
2007-07-19 12:49:23 +04:00
if ( get_user ( on , input ) ! = 0 )
return - EFAULT ;
if ( on ) {
2008-01-07 16:05:34 +03:00
cpu - > break_out = 1 ;
2007-10-25 09:02:50 +04:00
/* Pop it out of the Guest (may be running on different CPU) */
2008-01-07 16:05:34 +03:00
wake_up_process ( cpu - > tsk ) ;
2007-07-19 12:49:23 +04:00
/* Wait for them to reset it */
2008-01-07 16:05:34 +03:00
return wait_event_interruptible ( cpu - > break_wq , ! cpu - > break_out ) ;
2007-07-19 12:49:23 +04:00
} else {
2008-01-07 16:05:34 +03:00
cpu - > break_out = 0 ;
wake_up ( & cpu - > break_wq ) ;
2007-07-19 12:49:23 +04:00
return 0 ;
}
}
2007-07-26 21:41:03 +04:00
/*L:050 Sending an interrupt is done by writing LHREQ_IRQ and an interrupt
* number to / dev / lguest . */
2008-01-07 16:05:29 +03:00
static int user_send_irq ( struct lg_cpu * cpu , const unsigned long __user * input )
2007-07-19 12:49:23 +04:00
{
2007-10-22 05:03:31 +04:00
unsigned long irq ;
2007-07-19 12:49:23 +04:00
if ( get_user ( irq , input ) ! = 0 )
return - EFAULT ;
if ( irq > = LGUEST_IRQS )
return - EINVAL ;
2007-07-26 21:41:03 +04:00
/* Next time the Guest runs, the core code will see if it can deliver
* this interrupt . */
2008-01-07 16:05:29 +03:00
set_bit ( irq , cpu - > irqs_pending ) ;
2007-07-19 12:49:23 +04:00
return 0 ;
}
2007-07-26 21:41:03 +04:00
/*L:040 Once our Guest is initialized, the Launcher makes it run by reading
* from / dev / lguest . */
2007-07-19 12:49:23 +04:00
static ssize_t read ( struct file * file , char __user * user , size_t size , loff_t * o )
{
struct lguest * lg = file - > private_data ;
2008-01-07 16:05:25 +03:00
struct lg_cpu * cpu ;
unsigned int cpu_id = * o ;
2007-07-19 12:49:23 +04:00
2007-07-26 21:41:03 +04:00
/* You must write LHREQ_INITIALIZE first! */
2007-07-19 12:49:23 +04:00
if ( ! lg )
return - EINVAL ;
2008-01-07 16:05:25 +03:00
/* Watch out for arbitrary vcpu indexes! */
if ( cpu_id > = lg - > nr_cpus )
return - EINVAL ;
cpu = & lg - > cpus [ cpu_id ] ;
2007-10-25 09:02:50 +04:00
/* If you're not the task which owns the Guest, go away. */
2008-01-07 16:05:34 +03:00
if ( current ! = cpu - > tsk )
2007-07-19 12:49:23 +04:00
return - EPERM ;
2007-07-26 21:41:03 +04:00
/* If the guest is already dead, we indicate why */
2007-07-19 12:49:23 +04:00
if ( lg - > dead ) {
size_t len ;
2007-07-26 21:41:03 +04:00
/* lg->dead either contains an error code, or a string. */
2007-07-19 12:49:23 +04:00
if ( IS_ERR ( lg - > dead ) )
return PTR_ERR ( lg - > dead ) ;
2007-07-26 21:41:03 +04:00
/* We can only return as much as the buffer they read with. */
2007-07-19 12:49:23 +04:00
len = min ( size , strlen ( lg - > dead ) + 1 ) ;
if ( copy_to_user ( user , lg - > dead , len ) ! = 0 )
return - EFAULT ;
return len ;
}
2007-10-22 05:24:10 +04:00
/* If we returned from read() last time because the Guest notified,
2007-07-26 21:41:03 +04:00
* clear the flag . */
2008-01-07 16:05:36 +03:00
if ( cpu - > pending_notify )
cpu - > pending_notify = 0 ;
2007-07-19 12:49:23 +04:00
2007-07-26 21:41:03 +04:00
/* Run the Guest until something interesting happens. */
2008-01-07 16:05:25 +03:00
return run_guest ( cpu , ( unsigned long __user * ) user ) ;
2007-07-19 12:49:23 +04:00
}
2008-01-07 16:05:24 +03:00
static int lg_cpu_start ( struct lg_cpu * cpu , unsigned id , unsigned long start_ip )
{
if ( id > = NR_CPUS )
return - EINVAL ;
cpu - > id = id ;
cpu - > lg = container_of ( ( cpu - id ) , struct lguest , cpus [ 0 ] ) ;
cpu - > lg - > nr_cpus + + ;
2008-01-07 16:05:28 +03:00
init_clockdev ( cpu ) ;
2008-01-07 16:05:24 +03:00
2008-01-07 16:05:32 +03:00
/* We need a complete page for the Guest registers: they are accessible
* to the Guest and we can only grant it access to whole pages . */
cpu - > regs_page = get_zeroed_page ( GFP_KERNEL ) ;
if ( ! cpu - > regs_page )
return - ENOMEM ;
/* We actually put the registers at the bottom of the page. */
cpu - > regs = ( void * ) cpu - > regs_page + PAGE_SIZE - sizeof ( * cpu - > regs ) ;
/* Now we initialize the Guest's registers, handing it the start
* address . */
lguest_arch_setup_regs ( cpu , start_ip ) ;
2008-01-07 16:05:34 +03:00
/* Initialize the queue for the waker to wait on */
init_waitqueue_head ( & cpu - > break_wq ) ;
/* We keep a pointer to the Launcher task (ie. current task) for when
* other Guests want to wake this one ( inter - Guest I / O ) . */
cpu - > tsk = current ;
/* We need to keep a pointer to the Launcher's memory map, because if
* the Launcher dies we need to clean it up . If we don ' t keep a
* reference , it is destroyed before close ( ) is called . */
cpu - > mm = get_task_mm ( cpu - > tsk ) ;
2008-01-18 00:13:26 +03:00
/* We remember which CPU's pages this Guest used last, for optimization
* when the same Guest runs on the same CPU twice . */
cpu - > last_pages = NULL ;
2008-01-07 16:05:24 +03:00
return 0 ;
}
2007-10-22 05:03:36 +04:00
/*L:020 The initialization write supplies 4 pointer sized (32 or 64 bit)
2007-10-22 05:03:31 +04:00
* values ( in addition to the LHREQ_INITIALIZE value ) . These are :
2007-07-26 21:41:03 +04:00
*
2007-10-22 05:03:26 +04:00
* base : The start of the Guest - physical memory inside the Launcher memory .
*
2007-07-26 21:41:03 +04:00
* pfnlimit : The highest ( Guest - physical ) page number the Guest should be
2007-10-25 09:02:50 +04:00
* allowed to access . The Guest memory lives inside the Launcher , so it sets
* this to ensure the Guest can only reach its own memory .
2007-07-26 21:41:03 +04:00
*
* pgdir : The ( Guest - physical ) address of the top of the initial Guest
* pagetables ( which are set up by the Launcher ) .
*
* start : The first instruction to execute ( " eip " in x86 - speak ) .
*/
2007-10-22 05:03:31 +04:00
static int initialize ( struct file * file , const unsigned long __user * input )
2007-07-19 12:49:23 +04:00
{
2007-07-26 21:41:03 +04:00
/* "struct lguest" contains everything we (the Host) know about a
* Guest . */
2007-07-19 12:49:23 +04:00
struct lguest * lg ;
2007-10-22 05:03:27 +04:00
int err ;
2007-10-22 05:03:36 +04:00
unsigned long args [ 4 ] ;
2007-07-19 12:49:23 +04:00
2007-10-22 05:03:27 +04:00
/* We grab the Big Lguest lock, which protects against multiple
* simultaneous initializations . */
2007-07-19 12:49:23 +04:00
mutex_lock ( & lguest_lock ) ;
2007-07-26 21:41:03 +04:00
/* You can't initialize twice! Close the device and start again... */
2007-07-19 12:49:23 +04:00
if ( file - > private_data ) {
err = - EBUSY ;
goto unlock ;
}
if ( copy_from_user ( args , input , sizeof ( args ) ) ! = 0 ) {
err = - EFAULT ;
goto unlock ;
}
2007-10-22 05:03:27 +04:00
lg = kzalloc ( sizeof ( * lg ) , GFP_KERNEL ) ;
if ( ! lg ) {
err = - ENOMEM ;
2007-07-19 12:49:23 +04:00
goto unlock ;
}
2007-07-26 21:41:03 +04:00
/* Populate the easy fields of our "struct lguest" */
2007-10-22 05:03:26 +04:00
lg - > mem_base = ( void __user * ) ( long ) args [ 0 ] ;
lg - > pfn_limit = args [ 1 ] ;
2007-07-26 21:41:03 +04:00
2008-01-07 16:05:24 +03:00
/* This is the first cpu */
2008-01-07 16:05:25 +03:00
err = lg_cpu_start ( & lg - > cpus [ 0 ] , 0 , args [ 3 ] ) ;
2008-01-07 16:05:24 +03:00
if ( err )
goto release_guest ;
2007-07-26 21:41:03 +04:00
/* Initialize the Guest's shadow page tables, using the toplevel
* address the Launcher gave us . This allocates memory , so can
* fail . */
2007-10-22 05:03:26 +04:00
err = init_guest_pagetable ( lg , args [ 2 ] ) ;
2007-07-19 12:49:23 +04:00
if ( err )
goto free_regs ;
2007-07-26 21:41:03 +04:00
/* We keep our "struct lguest" in the file's private_data. */
2007-07-19 12:49:23 +04:00
file - > private_data = lg ;
mutex_unlock ( & lguest_lock ) ;
2007-07-26 21:41:03 +04:00
/* And because this is a write() call, we return the length used. */
2007-07-19 12:49:23 +04:00
return sizeof ( args ) ;
free_regs :
2008-01-07 16:05:32 +03:00
/* FIXME: This should be in free_vcpu */
free_page ( lg - > cpus [ 0 ] . regs_page ) ;
2007-07-19 12:49:23 +04:00
release_guest :
2007-11-15 03:59:00 +03:00
kfree ( lg ) ;
2007-07-19 12:49:23 +04:00
unlock :
mutex_unlock ( & lguest_lock ) ;
return err ;
}
2007-07-26 21:41:03 +04:00
/*L:010 The first operation the Launcher does must be a write. All writes
2007-10-25 09:02:50 +04:00
* start with an unsigned long number : for the first write this must be
2007-07-26 21:41:03 +04:00
* LHREQ_INITIALIZE to set up the Guest . After that the Launcher can use
2007-10-22 05:24:10 +04:00
* writes of other values to send interrupts . */
2007-10-22 05:03:31 +04:00
static ssize_t write ( struct file * file , const char __user * in ,
2007-07-19 12:49:23 +04:00
size_t size , loff_t * off )
{
2007-07-26 21:41:03 +04:00
/* Once the guest is initialized, we hold the "struct lguest" in the
* file private data . */
2007-07-19 12:49:23 +04:00
struct lguest * lg = file - > private_data ;
2007-10-22 05:03:31 +04:00
const unsigned long __user * input = ( const unsigned long __user * ) in ;
unsigned long req ;
2008-01-07 16:05:29 +03:00
struct lg_cpu * uninitialized_var ( cpu ) ;
2008-01-07 16:05:26 +03:00
unsigned int cpu_id = * off ;
2007-07-19 12:49:23 +04:00
if ( get_user ( req , input ) ! = 0 )
return - EFAULT ;
2007-10-22 05:03:31 +04:00
input + + ;
2007-07-19 12:49:23 +04:00
2007-07-26 21:41:03 +04:00
/* If you haven't initialized, you must do that first. */
2008-01-07 16:05:26 +03:00
if ( req ! = LHREQ_INITIALIZE ) {
if ( ! lg | | ( cpu_id > = lg - > nr_cpus ) )
return - EINVAL ;
cpu = & lg - > cpus [ cpu_id ] ;
if ( ! cpu )
return - EINVAL ;
}
2007-07-26 21:41:03 +04:00
/* Once the Guest is dead, all you can do is read() why it died. */
2007-07-19 12:49:23 +04:00
if ( lg & & lg - > dead )
return - ENOENT ;
/* If you're not the task which owns the Guest, you can only break */
2008-01-07 16:05:34 +03:00
if ( lg & & current ! = cpu - > tsk & & req ! = LHREQ_BREAK )
2007-07-19 12:49:23 +04:00
return - EPERM ;
switch ( req ) {
case LHREQ_INITIALIZE :
2007-10-22 05:03:31 +04:00
return initialize ( file , input ) ;
2007-07-19 12:49:23 +04:00
case LHREQ_IRQ :
2008-01-07 16:05:29 +03:00
return user_send_irq ( cpu , input ) ;
2007-07-19 12:49:23 +04:00
case LHREQ_BREAK :
2008-01-07 16:05:34 +03:00
return break_guest_out ( cpu , input ) ;
2007-07-19 12:49:23 +04:00
default :
return - EINVAL ;
}
}
2007-07-26 21:41:03 +04:00
/*L:060 The final piece of interface code is the close() routine. It reverses
* everything done in initialize ( ) . This is usually called because the
* Launcher exited .
*
* Note that the close routine returns 0 or a negative error number : it can ' t
* really fail , but it can whine . I blame Sun for this wart , and K & R C for
* letting them do it . : */
2007-07-19 12:49:23 +04:00
static int close ( struct inode * inode , struct file * file )
{
struct lguest * lg = file - > private_data ;
2008-01-07 16:05:28 +03:00
unsigned int i ;
2007-07-19 12:49:23 +04:00
2007-07-26 21:41:03 +04:00
/* If we never successfully initialized, there's nothing to clean up */
2007-07-19 12:49:23 +04:00
if ( ! lg )
return 0 ;
2007-07-26 21:41:03 +04:00
/* We need the big lock, to protect from inter-guest I/O and other
* Launchers initializing guests . */
2007-07-19 12:49:23 +04:00
mutex_lock ( & lguest_lock ) ;
2008-01-07 16:05:34 +03:00
/* Free up the shadow page tables for the Guest. */
free_guest_pagetable ( lg ) ;
2008-01-07 16:05:32 +03:00
for ( i = 0 ; i < lg - > nr_cpus ; i + + ) {
2008-01-07 16:05:28 +03:00
/* Cancels the hrtimer set via LHCALL_SET_CLOCKEVENT. */
hrtimer_cancel ( & lg - > cpus [ i ] . hrt ) ;
2008-01-07 16:05:32 +03:00
/* We can free up the register page we allocated. */
free_page ( lg - > cpus [ i ] . regs_page ) ;
2008-01-07 16:05:34 +03:00
/* Now all the memory cleanups are done, it's safe to release
* the Launcher ' s memory management structure . */
mmput ( lg - > cpus [ i ] . mm ) ;
2008-01-07 16:05:32 +03:00
}
2007-07-26 21:41:03 +04:00
/* If lg->dead doesn't contain an error code it will be NULL or a
* kmalloc ( ) ed string , either of which is ok to hand to kfree ( ) . */
2007-07-19 12:49:23 +04:00
if ( ! IS_ERR ( lg - > dead ) )
kfree ( lg - > dead ) ;
2007-07-26 21:41:03 +04:00
/* We clear the entire structure, which also marks it as free for the
* next user . */
2007-07-19 12:49:23 +04:00
memset ( lg , 0 , sizeof ( * lg ) ) ;
2007-07-26 21:41:03 +04:00
/* Release lock and exit. */
2007-07-19 12:49:23 +04:00
mutex_unlock ( & lguest_lock ) ;
2007-07-26 21:41:03 +04:00
2007-07-19 12:49:23 +04:00
return 0 ;
}
2007-07-26 21:41:03 +04:00
/*L:000
* Welcome to our journey through the Launcher !
*
* The Launcher is the Host userspace program which sets up , runs and services
* the Guest . In fact , many comments in the Drivers which refer to " the Host "
* doing things are inaccurate : the Launcher does all the device handling for
2007-10-25 09:02:50 +04:00
* the Guest , but the Guest can ' t know that .
2007-07-26 21:41:03 +04:00
*
* Just to confuse you : to the Host kernel , the Launcher * is * the Guest and we
* shall see more of that later .
*
* We begin our understanding with the Host kernel interface which the Launcher
* uses : reading and writing a character device called / dev / lguest . All the
* work happens in the read ( ) , write ( ) and close ( ) routines : */
2007-07-19 12:49:23 +04:00
static struct file_operations lguest_fops = {
. owner = THIS_MODULE ,
. release = close ,
. write = write ,
. read = read ,
} ;
2007-07-26 21:41:03 +04:00
/* This is a textbook example of a "misc" character device. Populate a "struct
* miscdevice " and register it with misc_register(). */
2007-07-19 12:49:23 +04:00
static struct miscdevice lguest_dev = {
. minor = MISC_DYNAMIC_MINOR ,
. name = " lguest " ,
. fops = & lguest_fops ,
} ;
int __init lguest_device_init ( void )
{
return misc_register ( & lguest_dev ) ;
}
void __exit lguest_device_remove ( void )
{
misc_deregister ( & lguest_dev ) ;
}