2007-07-09 18:51:59 +02:00
# include <linux/sched.h>
2006-06-26 00:25:12 -07:00
# include <linux/clocksource.h>
2006-06-26 00:25:10 -07:00
# include <linux/workqueue.h>
# include <linux/cpufreq.h>
# include <linux/jiffies.h>
# include <linux/init.h>
2006-06-26 00:25:12 -07:00
# include <linux/dmi.h>
2008-01-30 13:30:06 +01:00
# include <linux/percpu.h>
2006-06-26 00:25:10 -07:00
2006-06-26 00:25:12 -07:00
# include <asm/delay.h>
2006-06-26 00:25:10 -07:00
# include <asm/tsc.h>
# include <asm/io.h>
2007-03-05 00:30:35 -08:00
# include <asm/timer.h>
2006-06-26 00:25:10 -07:00
# include "mach_timer.h"
2007-03-24 23:02:49 +01:00
static int tsc_enabled ;
2006-06-26 00:25:10 -07:00
/*
* On some systems the TSC frequency does not
* change with the cpu frequency . So we need
* an extra value to store the TSC freq
*/
unsigned int tsc_khz ;
2007-07-19 01:49:23 -07:00
EXPORT_SYMBOL_GPL ( tsc_khz ) ;
2006-06-26 00:25:10 -07:00
# ifdef CONFIG_X86_TSC
static int __init tsc_setup ( char * str )
{
printk ( KERN_WARNING " notsc: Kernel compiled with CONFIG_X86_TSC, "
2008-02-19 11:02:30 +01:00
" cannot disable TSC completely. \n " ) ;
mark_tsc_unstable ( " user disabled TSC " ) ;
2006-06-26 00:25:10 -07:00
return 1 ;
}
# else
/*
* disable flag for tsc . Takes effect by clearing the TSC cpu flag
* in cpu / common . c
*/
static int __init tsc_setup ( char * str )
{
2008-01-30 13:33:20 +01:00
setup_clear_cpu_cap ( X86_FEATURE_TSC ) ;
2006-06-26 00:25:10 -07:00
return 1 ;
}
# endif
__setup ( " notsc " , tsc_setup ) ;
/*
* code to mark and check if the TSC is unstable
* due to cpufreq or due to unsynced TSCs
*/
static int tsc_unstable ;
2007-07-19 01:49:23 -07:00
int check_tsc_unstable ( void )
2006-06-26 00:25:10 -07:00
{
return tsc_unstable ;
}
2007-07-19 01:49:23 -07:00
EXPORT_SYMBOL_GPL ( check_tsc_unstable ) ;
2006-06-26 00:25:10 -07:00
2007-10-20 01:13:56 +02:00
/* Accelerators for sched_clock()
2006-06-26 00:25:10 -07:00
* convert from cycles ( 64 bits ) = > nanoseconds ( 64 bits )
* basic equation :
* ns = cycles / ( freq / ns_per_sec )
* ns = cycles * ( ns_per_sec / freq )
* ns = cycles * ( 10 ^ 9 / ( cpu_khz * 10 ^ 3 ) )
* ns = cycles * ( 10 ^ 6 / cpu_khz )
*
* Then we use scaling math ( suggested by george @ mvista . com ) to get :
* ns = cycles * ( 10 ^ 6 * SC / cpu_khz ) / SC
* ns = cycles * cyc2ns_scale / SC
*
* And since SC is a constant power of two , we can convert the div
* into a shift .
*
2007-10-20 02:23:49 +02:00
* We can use khz divisor instead of mhz to keep a better precision , since
2006-06-26 00:25:10 -07:00
* cyc2ns_scale is limited to 10 ^ 6 * 2 ^ 10 , which fits in 32 bits .
* ( mathieu . desnoyers @ polymtl . ca )
*
* - johnstul @ us . ibm . com " math is hard, lets go shopping! "
*/
2008-01-30 13:30:06 +01:00
DEFINE_PER_CPU ( unsigned long , cyc2ns ) ;
2006-06-26 00:25:10 -07:00
2008-01-30 13:30:06 +01:00
static void set_cyc2ns_scale ( unsigned long cpu_khz , int cpu )
2006-06-26 00:25:10 -07:00
{
2008-01-30 13:30:06 +01:00
unsigned long long tsc_now , ns_now ;
2008-03-31 14:52:15 +02:00
unsigned long flags , * scale ;
2008-01-30 13:30:06 +01:00
local_irq_save ( flags ) ;
sched_clock_idle_sleep_event ( ) ;
scale = & per_cpu ( cyc2ns , cpu ) ;
rdtscll ( tsc_now ) ;
ns_now = __cycles_2_ns ( tsc_now ) ;
if ( cpu_khz )
* scale = ( NSEC_PER_MSEC < < CYC2NS_SCALE_FACTOR ) / cpu_khz ;
/*
* Start smoothly with the new frequency :
*/
sched_clock_idle_wakeup_event ( 0 ) ;
local_irq_restore ( flags ) ;
2006-06-26 00:25:10 -07:00
}
/*
* Scheduler clock - returns current time in nanosec units .
*/
2007-07-17 18:37:04 -07:00
unsigned long long native_sched_clock ( void )
2006-06-26 00:25:10 -07:00
{
unsigned long long this_offset ;
/*
2007-02-13 13:26:22 +01:00
* Fall back to jiffies if there ' s no TSC available :
2007-07-09 18:51:59 +02:00
* ( But note that we still use it if the TSC is marked
* unstable . We do this because unlike Time Of Day ,
* the scheduler clock tolerates small errors and it ' s
* very important for it to be as fast as the platform
* can achive it . )
2006-06-26 00:25:10 -07:00
*/
2007-07-09 18:51:59 +02:00
if ( unlikely ( ! tsc_enabled & & ! tsc_unstable ) )
2007-02-13 13:26:22 +01:00
/* No locking but a rare wrong value is not a big deal: */
2006-06-26 00:25:10 -07:00
return ( jiffies_64 - INITIAL_JIFFIES ) * ( 1000000000 / HZ ) ;
/* read the Time Stamp Counter: */
2007-07-17 18:37:04 -07:00
rdtscll ( this_offset ) ;
2006-06-26 00:25:10 -07:00
/* return the value in ns */
return cycles_2_ns ( this_offset ) ;
}
2007-07-17 18:37:04 -07:00
/* We need to define a real function for sched_clock, to override the
weak default version */
# ifdef CONFIG_PARAVIRT
unsigned long long sched_clock ( void )
{
return paravirt_sched_clock ( ) ;
}
# else
unsigned long long sched_clock ( void )
__attribute__ ( ( alias ( " native_sched_clock " ) ) ) ;
# endif
2007-03-05 00:30:36 -08:00
unsigned long native_calculate_cpu_khz ( void )
2006-06-26 00:25:10 -07:00
{
unsigned long long start , end ;
unsigned long count ;
2007-10-23 22:37:22 +02:00
u64 delta64 = ( u64 ) ULLONG_MAX ;
2006-06-26 00:25:10 -07:00
int i ;
unsigned long flags ;
local_irq_save ( flags ) ;
2007-10-23 22:37:22 +02:00
/* run 3 times to ensure the cache is warm and to get an accurate reading */
2006-06-26 00:25:10 -07:00
for ( i = 0 ; i < 3 ; i + + ) {
mach_prepare_counter ( ) ;
rdtscll ( start ) ;
mach_countup ( & count ) ;
rdtscll ( end ) ;
2007-10-23 22:37:22 +02:00
/*
* Error : ECTCNEVERSET
* The CTC wasn ' t reliable : we got a hit on the very first read ,
* or the CPU was so fast / slow that the quotient wouldn ' t fit in
* 32 bits . .
*/
if ( count < = 1 )
continue ;
/* cpu freq too slow: */
if ( ( end - start ) < = CALIBRATE_TIME_MSEC )
continue ;
/*
* We want the minimum time of all runs in case one of them
* is inaccurate due to SMI or other delay
*/
2007-10-23 22:37:22 +02:00
delta64 = min ( delta64 , ( end - start ) ) ;
2006-06-26 00:25:10 -07:00
}
2007-10-23 22:37:22 +02:00
/* cpu freq too fast (or every run was bad): */
2006-06-26 00:25:10 -07:00
if ( delta64 > ( 1ULL < < 32 ) )
goto err ;
delta64 + = CALIBRATE_TIME_MSEC / 2 ; /* round for do_div */
do_div ( delta64 , CALIBRATE_TIME_MSEC ) ;
local_irq_restore ( flags ) ;
return ( unsigned long ) delta64 ;
err :
local_irq_restore ( flags ) ;
return 0 ;
}
int recalibrate_cpu_khz ( void )
{
# ifndef CONFIG_SMP
unsigned long cpu_khz_old = cpu_khz ;
if ( cpu_has_tsc ) {
cpu_khz = calculate_cpu_khz ( ) ;
tsc_khz = cpu_khz ;
2007-10-19 20:35:04 +02:00
cpu_data ( 0 ) . loops_per_jiffy =
cpufreq_scale ( cpu_data ( 0 ) . loops_per_jiffy ,
2006-06-26 00:25:10 -07:00
cpu_khz_old , cpu_khz ) ;
return 0 ;
} else
return - ENODEV ;
# else
return - ENODEV ;
# endif
}
EXPORT_SYMBOL ( recalibrate_cpu_khz ) ;
# ifdef CONFIG_CPU_FREQ
/*
* if the CPU frequency is scaled , TSC - based delays will need a different
* loops_per_jiffy value to function properly .
*/
2008-02-19 11:02:30 +01:00
static unsigned int ref_freq ;
static unsigned long loops_per_jiffy_ref ;
static unsigned long cpu_khz_ref ;
2006-06-26 00:25:10 -07:00
static int
time_cpufreq_notifier ( struct notifier_block * nb , unsigned long val , void * data )
{
struct cpufreq_freqs * freq = data ;
if ( ! ref_freq ) {
if ( ! freq - > old ) {
ref_freq = freq - > new ;
2007-05-02 19:27:18 +02:00
return 0 ;
2006-06-26 00:25:10 -07:00
}
ref_freq = freq - > old ;
2007-10-19 20:35:04 +02:00
loops_per_jiffy_ref = cpu_data ( freq - > cpu ) . loops_per_jiffy ;
2006-06-26 00:25:10 -07:00
cpu_khz_ref = cpu_khz ;
}
if ( ( val = = CPUFREQ_PRECHANGE & & freq - > old < freq - > new ) | |
( val = = CPUFREQ_POSTCHANGE & & freq - > old > freq - > new ) | |
( val = = CPUFREQ_RESUMECHANGE ) ) {
if ( ! ( freq - > flags & CPUFREQ_CONST_LOOPS ) )
2007-10-19 20:35:04 +02:00
cpu_data ( freq - > cpu ) . loops_per_jiffy =
2006-06-26 00:25:10 -07:00
cpufreq_scale ( loops_per_jiffy_ref ,
ref_freq , freq - > new ) ;
if ( cpu_khz ) {
if ( num_online_cpus ( ) = = 1 )
cpu_khz = cpufreq_scale ( cpu_khz_ref ,
ref_freq , freq - > new ) ;
if ( ! ( freq - > flags & CPUFREQ_CONST_LOOPS ) ) {
tsc_khz = cpu_khz ;
2008-04-07 12:14:45 +02:00
set_cyc2ns_scale ( cpu_khz , freq - > cpu ) ;
2006-06-26 00:25:10 -07:00
/*
* TSC based sched_clock turns
* to junk w / cpufreq
*/
2007-05-02 19:27:08 +02:00
mark_tsc_unstable ( " cpufreq changes " ) ;
2006-06-26 00:25:10 -07:00
}
}
}
return 0 ;
}
static struct notifier_block time_cpufreq_notifier_block = {
. notifier_call = time_cpufreq_notifier
} ;
static int __init cpufreq_tsc ( void )
{
2007-02-16 01:27:32 -08:00
return cpufreq_register_notifier ( & time_cpufreq_notifier_block ,
CPUFREQ_TRANSITION_NOTIFIER ) ;
2006-06-26 00:25:10 -07:00
}
core_initcall ( cpufreq_tsc ) ;
# endif
2006-06-26 00:25:12 -07:00
/* clock source code */
2008-02-19 11:02:30 +01:00
static unsigned long current_tsc_khz ;
2008-04-01 19:45:18 +02:00
static struct clocksource clocksource_tsc ;
2006-06-26 00:25:12 -07:00
2008-04-01 19:45:18 +02:00
/*
* We compare the TSC to the cycle_last value in the clocksource
* structure to avoid a nasty time - warp issue . This can be observed in
* a very small window right after one CPU updated cycle_last under
* xtime lock and the other CPU reads a TSC value which is smaller
* than the cycle_last reference value due to a TSC which is slighty
* behind . This delta is nowhere else observable , but in that case it
* results in a forward time jump in the range of hours due to the
* unsigned delta calculation of the time keeping core code , which is
* necessary to support wrapping clocksources like pm timer .
*/
2006-06-26 00:25:12 -07:00
static cycle_t read_tsc ( void )
{
cycle_t ret ;
rdtscll ( ret ) ;
2008-04-01 19:45:18 +02:00
return ret > = clocksource_tsc . cycle_last ?
ret : clocksource_tsc . cycle_last ;
2006-06-26 00:25:12 -07:00
}
static struct clocksource clocksource_tsc = {
. name = " tsc " ,
. rating = 300 ,
. read = read_tsc ,
2006-06-26 00:25:15 -07:00
. mask = CLOCKSOURCE_MASK ( 64 ) ,
2006-06-26 00:25:12 -07:00
. mult = 0 , /* to be set */
. shift = 22 ,
2007-02-16 01:27:36 -08:00
. flags = CLOCK_SOURCE_IS_CONTINUOUS |
CLOCK_SOURCE_MUST_VERIFY ,
2006-06-26 00:25:12 -07:00
} ;
2007-05-02 19:27:08 +02:00
void mark_tsc_unstable ( char * reason )
2006-06-26 00:25:12 -07:00
{
2007-02-16 01:27:42 -08:00
if ( ! tsc_unstable ) {
tsc_unstable = 1 ;
2007-03-24 23:02:49 +01:00
tsc_enabled = 0 ;
2007-05-02 19:27:08 +02:00
printk ( " Marking TSC unstable due to: %s. \n " , reason ) ;
2007-02-16 01:27:42 -08:00
/* Can be called before registration */
if ( clocksource_tsc . mult )
clocksource_change_rating ( & clocksource_tsc , 0 ) ;
else
clocksource_tsc . rating = 0 ;
2006-06-26 00:25:12 -07:00
}
}
2007-02-16 01:27:42 -08:00
EXPORT_SYMBOL_GPL ( mark_tsc_unstable ) ;
2006-06-26 00:25:12 -07:00
2007-10-03 15:15:40 -04:00
static int __init dmi_mark_tsc_unstable ( const struct dmi_system_id * d )
2006-06-26 00:25:12 -07:00
{
printk ( KERN_NOTICE " %s detected: marking TSC unstable. \n " ,
d - > ident ) ;
2007-02-16 01:27:42 -08:00
tsc_unstable = 1 ;
2006-06-26 00:25:12 -07:00
return 0 ;
}
/* List of systems that have known TSC problems */
static struct dmi_system_id __initdata bad_tsc_dmi_table [ ] = {
{
. callback = dmi_mark_tsc_unstable ,
. ident = " IBM Thinkpad 380XD " ,
. matches = {
DMI_MATCH ( DMI_BOARD_VENDOR , " IBM " ) ,
DMI_MATCH ( DMI_BOARD_NAME , " 2635FA0 " ) ,
} ,
} ,
{ }
} ;
/*
* Make an educated guess if the TSC is trustworthy and synchronized
* over all CPUs .
*/
2007-02-16 01:27:34 -08:00
__cpuinit int unsynchronized_tsc ( void )
2006-06-26 00:25:12 -07:00
{
2007-02-16 01:27:34 -08:00
if ( ! cpu_has_tsc | | tsc_unstable )
return 1 ;
2008-01-30 13:32:40 +01:00
/* Anything with constant TSC should be synchronized */
if ( boot_cpu_has ( X86_FEATURE_CONSTANT_TSC ) )
return 0 ;
2006-06-26 00:25:12 -07:00
/*
* Intel systems are normally all synchronized .
* Exceptions must mark TSC as unstable :
*/
2007-02-16 01:27:42 -08:00
if ( boot_cpu_data . x86_vendor ! = X86_VENDOR_INTEL ) {
/* assume multi socket systems are not synchronized: */
if ( num_possible_cpus ( ) > 1 )
tsc_unstable = 1 ;
}
return tsc_unstable ;
2006-06-26 00:25:12 -07:00
}
2007-02-16 01:27:44 -08:00
/*
* Geode_LX - the OLPC CPU has a possibly a very reliable TSC
*/
# ifdef CONFIG_MGEODE_LX
/* RTSC counts during suspend */
# define RTSC_SUSP 0x100
static void __init check_geode_tsc_reliable ( void )
{
2007-10-17 18:04:34 +02:00
unsigned long res_low , res_high ;
2007-02-16 01:27:44 -08:00
2007-10-17 18:04:34 +02:00
rdmsr_safe ( MSR_GEODE_BUSCONT_CONF0 , & res_low , & res_high ) ;
if ( res_low & RTSC_SUSP )
2007-02-16 01:27:44 -08:00
clocksource_tsc . flags & = ~ CLOCK_SOURCE_MUST_VERIFY ;
}
# else
static inline void check_geode_tsc_reliable ( void ) { }
# endif
2007-03-05 00:30:50 -08:00
void __init tsc_init ( void )
2006-06-26 00:25:12 -07:00
{
2008-01-30 13:30:06 +01:00
int cpu ;
2008-01-30 13:33:20 +01:00
if ( ! cpu_has_tsc )
2008-03-04 23:07:50 +11:00
return ;
2006-06-26 00:25:12 -07:00
2007-03-05 00:30:50 -08:00
cpu_khz = calculate_cpu_khz ( ) ;
tsc_khz = cpu_khz ;
2006-06-26 00:25:12 -07:00
2008-03-04 23:07:50 +11:00
if ( ! cpu_khz ) {
mark_tsc_unstable ( " could not calculate TSC khz " ) ;
return ;
}
2006-06-26 00:25:12 -07:00
2007-03-05 00:30:50 -08:00
printk ( " Detected %lu.%03lu MHz processor. \n " ,
( unsigned long ) cpu_khz / 1000 ,
( unsigned long ) cpu_khz % 1000 ) ;
2008-01-30 13:30:06 +01:00
/*
* Secondary CPUs do not run through tsc_init ( ) , so set up
* all the scale factors for all CPUs , assuming the same
* speed as the bootup CPU . ( cpufreq notifiers will fix this
* up if their speed diverges )
*/
for_each_possible_cpu ( cpu )
set_cyc2ns_scale ( cpu_khz , cpu ) ;
2007-03-05 00:30:50 -08:00
use_tsc_delay ( ) ;
/* Check and install the TSC clocksource */
dmi_check_system ( bad_tsc_dmi_table ) ;
unsynchronized_tsc ( ) ;
check_geode_tsc_reliable ( ) ;
current_tsc_khz = tsc_khz ;
clocksource_tsc . mult = clocksource_khz2mult ( current_tsc_khz ,
clocksource_tsc . shift ) ;
/* lower the rating if we already know its unstable: */
if ( check_tsc_unstable ( ) ) {
clocksource_tsc . rating = 0 ;
clocksource_tsc . flags & = ~ CLOCK_SOURCE_IS_CONTINUOUS ;
2007-03-24 23:02:49 +01:00
} else
tsc_enabled = 1 ;
2007-03-05 00:30:50 -08:00
clocksource_register ( & clocksource_tsc ) ;
}