2008-02-15 22:52:48 +03:00
/* KVM paravirtual clock driver. A clocksource implementation
Copyright ( C ) 2008 Glauber de Oliveira Costa , Red Hat Inc .
This program is free software ; you can redistribute it and / or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation ; either version 2 of the License , or
( at your option ) any later version .
This program is distributed in the hope that it will be useful ,
but WITHOUT ANY WARRANTY ; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE . See the
GNU General Public License for more details .
You should have received a copy of the GNU General Public License
along with this program ; if not , write to the Free Software
Foundation , Inc . , 51 Franklin St , Fifth Floor , Boston , MA 02110 - 1301 USA
*/
# include <linux/clocksource.h>
# include <linux/kvm_para.h>
2008-06-03 18:17:32 +04:00
# include <asm/pvclock.h>
2008-02-15 22:52:48 +03:00
# include <asm/msr.h>
# include <asm/apic.h>
# include <linux/percpu.h>
2012-03-10 23:37:26 +04:00
# include <linux/hardirq.h>
2012-11-28 05:28:48 +04:00
# include <linux/memblock.h>
2009-08-19 14:35:53 +04:00
# include <asm/x86_init.h>
2008-03-17 22:08:40 +03:00
# include <asm/reboot.h>
2008-02-15 22:52:48 +03:00
static int kvmclock = 1 ;
2010-05-11 20:17:44 +04:00
static int msr_kvm_system_time = MSR_KVM_SYSTEM_TIME ;
static int msr_kvm_wall_clock = MSR_KVM_WALL_CLOCK ;
2008-02-15 22:52:48 +03:00
static int parse_no_kvmclock ( char * arg )
{
kvmclock = 0 ;
return 0 ;
}
early_param ( " no-kvmclock " , parse_no_kvmclock ) ;
/* The hypervisor will put information about time periodically here */
2012-11-28 05:28:48 +04:00
struct pvclock_aligned_vcpu_time_info {
struct pvclock_vcpu_time_info clock ;
} __attribute__ ( ( __aligned__ ( SMP_CACHE_BYTES ) ) ) ;
static struct pvclock_aligned_vcpu_time_info * hv_clock ;
2008-06-03 18:17:32 +04:00
static struct pvclock_wall_clock wall_clock ;
2008-02-15 22:52:48 +03:00
/*
* The wallclock is the time of day when we booted . Since then , some time may
* have elapsed since the hypervisor wrote the data . So we try to account for
* that with system time
*/
2008-05-22 12:37:48 +04:00
static unsigned long kvm_get_wallclock ( void )
2008-02-15 22:52:48 +03:00
{
2008-06-03 18:17:32 +04:00
struct pvclock_vcpu_time_info * vcpu_time ;
2008-02-15 22:52:48 +03:00
struct timespec ts ;
int low , high ;
2012-11-28 05:28:48 +04:00
int cpu ;
2008-02-15 22:52:48 +03:00
2009-08-31 11:04:31 +04:00
low = ( int ) __pa_symbol ( & wall_clock ) ;
high = ( ( u64 ) __pa_symbol ( & wall_clock ) > > 32 ) ;
2010-05-11 20:17:44 +04:00
native_write_msr ( msr_kvm_wall_clock , low , high ) ;
2008-02-15 22:52:48 +03:00
2012-11-28 05:28:48 +04:00
preempt_disable ( ) ;
cpu = smp_processor_id ( ) ;
vcpu_time = & hv_clock [ cpu ] . clock ;
2008-06-03 18:17:32 +04:00
pvclock_read_wallclock ( & wall_clock , vcpu_time , & ts ) ;
2012-11-28 05:28:48 +04:00
preempt_enable ( ) ;
2008-02-15 22:52:48 +03:00
2008-06-03 18:17:32 +04:00
return ts . tv_sec ;
2008-02-15 22:52:48 +03:00
}
2008-05-22 12:37:48 +04:00
static int kvm_set_wallclock ( unsigned long now )
2008-02-15 22:52:48 +03:00
{
2008-06-03 18:17:32 +04:00
return - 1 ;
2008-02-15 22:52:48 +03:00
}
static cycle_t kvm_clock_read ( void )
{
2008-06-03 18:17:32 +04:00
struct pvclock_vcpu_time_info * src ;
cycle_t ret ;
2012-11-28 05:28:48 +04:00
int cpu ;
2008-02-15 22:52:48 +03:00
2011-11-15 16:59:07 +04:00
preempt_disable_notrace ( ) ;
2012-11-28 05:28:48 +04:00
cpu = smp_processor_id ( ) ;
src = & hv_clock [ cpu ] . clock ;
2008-06-03 18:17:32 +04:00
ret = pvclock_clocksource_read ( src ) ;
2011-11-15 16:59:07 +04:00
preempt_enable_notrace ( ) ;
2008-06-03 18:17:32 +04:00
return ret ;
2008-02-15 22:52:48 +03:00
}
2008-06-03 18:17:32 +04:00
2009-04-21 23:24:00 +04:00
static cycle_t kvm_clock_get_cycles ( struct clocksource * cs )
{
return kvm_clock_read ( ) ;
}
2008-07-28 18:47:53 +04:00
/*
* If we don ' t do that , there is the possibility that the guest
* will calibrate under heavy load - thus , getting a lower lpj -
* and execute the delays themselves without load . This is wrong ,
* because no delay loop can finish beforehand .
* Any heuristics is subject to fail , because ultimately , a large
* poll of guests can be running and trouble each other . So we preset
* lpj here
*/
static unsigned long kvm_get_tsc_khz ( void )
{
2008-12-05 23:36:45 +03:00
struct pvclock_vcpu_time_info * src ;
2012-11-28 05:28:48 +04:00
int cpu ;
unsigned long tsc_khz ;
preempt_disable ( ) ;
cpu = smp_processor_id ( ) ;
src = & hv_clock [ cpu ] . clock ;
tsc_khz = pvclock_tsc_khz ( src ) ;
preempt_enable ( ) ;
return tsc_khz ;
2008-07-28 18:47:53 +04:00
}
static void kvm_get_preset_lpj ( void )
{
unsigned long khz ;
u64 lpj ;
2008-12-05 23:36:45 +03:00
khz = kvm_get_tsc_khz ( ) ;
2008-07-28 18:47:53 +04:00
lpj = ( ( u64 ) khz * 1000 ) ;
do_div ( lpj , HZ ) ;
preset_lpj = lpj ;
}
2012-03-10 23:37:26 +04:00
bool kvm_check_and_clear_guest_paused ( void )
{
bool ret = false ;
struct pvclock_vcpu_time_info * src ;
2012-11-28 05:28:48 +04:00
int cpu = smp_processor_id ( ) ;
if ( ! hv_clock )
return ret ;
2012-03-10 23:37:26 +04:00
2012-11-28 05:28:48 +04:00
src = & hv_clock [ cpu ] . clock ;
2012-03-10 23:37:26 +04:00
if ( ( src - > flags & PVCLOCK_GUEST_STOPPED ) ! = 0 ) {
2012-11-28 05:28:48 +04:00
src - > flags & = ~ PVCLOCK_GUEST_STOPPED ;
2012-03-10 23:37:26 +04:00
ret = true ;
}
return ret ;
}
2008-02-15 22:52:48 +03:00
static struct clocksource kvm_clock = {
. name = " kvm-clock " ,
2009-04-21 23:24:00 +04:00
. read = kvm_clock_get_cycles ,
2008-02-15 22:52:48 +03:00
. rating = 400 ,
. mask = CLOCKSOURCE_MASK ( 64 ) ,
. flags = CLOCK_SOURCE_IS_CONTINUOUS ,
} ;
2010-10-14 13:22:49 +04:00
int kvm_register_clock ( char * txt )
2008-02-15 22:52:48 +03:00
{
int cpu = smp_processor_id ( ) ;
2010-08-03 01:35:28 +04:00
int low , high , ret ;
2012-11-28 05:28:48 +04:00
struct pvclock_vcpu_time_info * src = & hv_clock [ cpu ] . clock ;
2010-08-03 01:35:28 +04:00
2012-11-28 05:28:48 +04:00
low = ( int ) __pa ( src ) | 1 ;
high = ( ( u64 ) __pa ( src ) > > 32 ) ;
2010-08-03 01:35:28 +04:00
ret = native_write_msr_safe ( msr_kvm_system_time , low , high ) ;
2008-06-03 18:17:32 +04:00
printk ( KERN_INFO " kvm-clock: cpu %d, msr %x:%x, %s \n " ,
cpu , high , low , txt ) ;
2010-05-11 20:17:44 +04:00
2010-08-03 01:35:28 +04:00
return ret ;
2008-02-15 22:52:48 +03:00
}
2012-02-13 17:07:27 +04:00
static void kvm_save_sched_clock_state ( void )
{
}
static void kvm_restore_sched_clock_state ( void )
{
kvm_register_clock ( " primary cpu clock, resume " ) ;
}
2008-04-30 19:39:05 +04:00
# ifdef CONFIG_X86_LOCAL_APIC
2008-11-22 20:37:44 +03:00
static void __cpuinit kvm_setup_secondary_clock ( void )
2008-02-15 22:52:48 +03:00
{
/*
* Now that the first cpu already had this clocksource initialized ,
* we shouldn ' t fail .
*/
2008-06-03 18:17:32 +04:00
WARN_ON ( kvm_register_clock ( " secondary cpu clock " ) ) ;
2008-02-15 22:52:48 +03:00
}
2008-04-30 19:39:05 +04:00
# endif
2008-02-15 22:52:48 +03:00
2008-03-17 22:08:40 +03:00
/*
* After the clock is registered , the host will keep writing to the
* registered memory location . If the guest happens to shutdown , this memory
* won ' t be valid . In cases like kexec , in which you install a new kernel , this
* means a random memory location will be kept being written . So before any
* kind of shutdown from our side , we unregister the clock by writting anything
* that does not have the ' enable ' bit set in the msr
*/
# ifdef CONFIG_KEXEC
static void kvm_crash_shutdown ( struct pt_regs * regs )
{
2010-05-11 20:17:44 +04:00
native_write_msr ( msr_kvm_system_time , 0 , 0 ) ;
2011-07-11 23:28:19 +04:00
kvm_disable_steal_time ( ) ;
2008-03-17 22:08:40 +03:00
native_machine_crash_shutdown ( regs ) ;
}
# endif
static void kvm_shutdown ( void )
{
2010-05-11 20:17:44 +04:00
native_write_msr ( msr_kvm_system_time , 0 , 0 ) ;
2011-07-11 23:28:19 +04:00
kvm_disable_steal_time ( ) ;
2008-03-17 22:08:40 +03:00
native_machine_shutdown ( ) ;
}
2008-02-15 22:52:48 +03:00
void __init kvmclock_init ( void )
{
2012-11-28 05:28:48 +04:00
unsigned long mem ;
2008-02-15 22:52:48 +03:00
if ( ! kvm_para_available ( ) )
return ;
2010-05-11 20:17:44 +04:00
if ( kvmclock & & kvm_para_has_feature ( KVM_FEATURE_CLOCKSOURCE2 ) ) {
msr_kvm_system_time = MSR_KVM_SYSTEM_TIME_NEW ;
msr_kvm_wall_clock = MSR_KVM_WALL_CLOCK_NEW ;
} else if ( ! ( kvmclock & & kvm_para_has_feature ( KVM_FEATURE_CLOCKSOURCE ) ) )
return ;
printk ( KERN_INFO " kvm-clock: Using msrs %x and %x " ,
msr_kvm_system_time , msr_kvm_wall_clock ) ;
2012-11-28 05:28:48 +04:00
mem = memblock_alloc ( sizeof ( struct pvclock_aligned_vcpu_time_info ) * NR_CPUS ,
PAGE_SIZE ) ;
if ( ! mem )
return ;
hv_clock = __va ( mem ) ;
if ( kvm_register_clock ( " boot clock " ) ) {
hv_clock = NULL ;
memblock_free ( mem ,
sizeof ( struct pvclock_aligned_vcpu_time_info ) * NR_CPUS ) ;
2010-05-11 20:17:44 +04:00
return ;
2012-11-28 05:28:48 +04:00
}
2010-05-11 20:17:44 +04:00
pv_time_ops . sched_clock = kvm_clock_read ;
x86_platform . calibrate_tsc = kvm_get_tsc_khz ;
x86_platform . get_wallclock = kvm_get_wallclock ;
x86_platform . set_wallclock = kvm_set_wallclock ;
2008-04-30 19:39:05 +04:00
# ifdef CONFIG_X86_LOCAL_APIC
2012-02-07 18:52:44 +04:00
x86_cpuinit . early_percpu_clock_init =
2010-05-11 20:17:44 +04:00
kvm_setup_secondary_clock ;
2008-04-30 19:39:05 +04:00
# endif
2012-02-13 17:07:27 +04:00
x86_platform . save_sched_clock_state = kvm_save_sched_clock_state ;
x86_platform . restore_sched_clock_state = kvm_restore_sched_clock_state ;
2010-05-11 20:17:44 +04:00
machine_ops . shutdown = kvm_shutdown ;
2008-03-17 22:08:40 +03:00
# ifdef CONFIG_KEXEC
2010-05-11 20:17:44 +04:00
machine_ops . crash_shutdown = kvm_crash_shutdown ;
2008-03-17 22:08:40 +03:00
# endif
2010-05-11 20:17:44 +04:00
kvm_get_preset_lpj ( ) ;
2010-04-27 06:03:05 +04:00
clocksource_register_hz ( & kvm_clock , NSEC_PER_SEC ) ;
2010-05-11 20:17:44 +04:00
pv_info . paravirt_enabled = 1 ;
pv_info . name = " KVM " ;
2010-05-11 20:17:45 +04:00
if ( kvm_para_has_feature ( KVM_FEATURE_CLOCKSOURCE_STABLE_BIT ) )
pvclock_set_flags ( PVCLOCK_TSC_STABLE_BIT ) ;
2008-02-15 22:52:48 +03:00
}