2007-02-16 01:28:03 -08:00
/*
* linux / kernel / time / tick - sched . c
*
* Copyright ( C ) 2005 - 2006 , Thomas Gleixner < tglx @ linutronix . de >
* Copyright ( C ) 2005 - 2007 , Red Hat , Inc . , Ingo Molnar
* Copyright ( C ) 2006 - 2007 Timesys Corp . , Thomas Gleixner
*
* No idle tick implementation for low and high resolution timers
*
* Started by : Thomas Gleixner and Ingo Molnar
*
* For licencing details see kernel - base / COPYING
*/
# include <linux/cpu.h>
# include <linux/err.h>
# include <linux/hrtimer.h>
# include <linux/interrupt.h>
# include <linux/kernel_stat.h>
# include <linux/percpu.h>
# include <linux/profile.h>
# include <linux/sched.h>
# include <linux/tick.h>
2007-02-24 22:10:13 -08:00
# include <asm/irq_regs.h>
2007-02-16 01:28:03 -08:00
# include "tick-internal.h"
/*
* Per cpu nohz control structure
*/
static DEFINE_PER_CPU ( struct tick_sched , tick_cpu_sched ) ;
/*
* The time , when the last jiffy update happened . Protected by xtime_lock .
*/
static ktime_t last_jiffies_update ;
[PATCH] Add debugging feature /proc/timer_list
add /proc/timer_list, which prints all currently pending (high-res) timers,
all clock-event sources and their parameters in a human-readable form.
Sample output:
Timer List Version: v0.1
HRTIMER_MAX_CLOCK_BASES: 2
now at 4246046273872 nsecs
cpu: 0
clock 0:
.index: 0
.resolution: 1 nsecs
.get_time: ktime_get_real
.offset: 1273998312645738432 nsecs
active timers:
clock 1:
.index: 1
.resolution: 1 nsecs
.get_time: ktime_get
.offset: 0 nsecs
active timers:
#0: <f5a90ec8>, hrtimer_sched_tick, hrtimer_stop_sched_tick, swapper/0
# expires at 4246432689566 nsecs [in 386415694 nsecs]
#1: <f5a90ec8>, hrtimer_wakeup, do_nanosleep, pcscd/2050
# expires at 4247018194689 nsecs [in 971920817 nsecs]
#2: <f5a90ec8>, hrtimer_wakeup, do_nanosleep, irqbalance/1909
# expires at 4247351358392 nsecs [in 1305084520 nsecs]
#3: <f5a90ec8>, hrtimer_wakeup, do_nanosleep, crond/2157
# expires at 4249097614968 nsecs [in 3051341096 nsecs]
#4: <f5a90ec8>, it_real_fn, do_setitimer, syslogd/1888
# expires at 4251329900926 nsecs [in 5283627054 nsecs]
.expires_next : 4246432689566 nsecs
.hres_active : 1
.check_clocks : 0
.nr_events : 31306
.idle_tick : 4246020791890 nsecs
.tick_stopped : 1
.idle_jiffies : 986504
.idle_calls : 40700
.idle_sleeps : 36014
.idle_entrytime : 4246019418883 nsecs
.idle_sleeptime : 4178181972709 nsecs
cpu: 1
clock 0:
.index: 0
.resolution: 1 nsecs
.get_time: ktime_get_real
.offset: 1273998312645738432 nsecs
active timers:
clock 1:
.index: 1
.resolution: 1 nsecs
.get_time: ktime_get
.offset: 0 nsecs
active timers:
#0: <f5a90ec8>, hrtimer_sched_tick, hrtimer_restart_sched_tick, swapper/0
# expires at 4246050084568 nsecs [in 3810696 nsecs]
#1: <f5a90ec8>, hrtimer_wakeup, do_nanosleep, atd/2227
# expires at 4261010635003 nsecs [in 14964361131 nsecs]
#2: <f5a90ec8>, hrtimer_wakeup, do_nanosleep, smartd/2332
# expires at 5469485798970 nsecs [in 1223439525098 nsecs]
.expires_next : 4246050084568 nsecs
.hres_active : 1
.check_clocks : 0
.nr_events : 24043
.idle_tick : 4246046084568 nsecs
.tick_stopped : 0
.idle_jiffies : 986510
.idle_calls : 26360
.idle_sleeps : 22551
.idle_entrytime : 4246043874339 nsecs
.idle_sleeptime : 4170763761184 nsecs
tick_broadcast_mask: 00000003
event_broadcast_mask: 00000001
CPU#0's local event device:
Clock Event Device: lapic
capabilities: 0000000e
max_delta_ns: 807385544
min_delta_ns: 1443
mult: 44624025
shift: 32
set_next_event: lapic_next_event
set_mode: lapic_timer_setup
event_handler: hrtimer_interrupt
.installed: 1
.expires: 4246432689566 nsecs
CPU#1's local event device:
Clock Event Device: lapic
capabilities: 0000000e
max_delta_ns: 807385544
min_delta_ns: 1443
mult: 44624025
shift: 32
set_next_event: lapic_next_event
set_mode: lapic_timer_setup
event_handler: hrtimer_interrupt
.installed: 1
.expires: 4246050084568 nsecs
Clock Event Device: hpet
capabilities: 00000007
max_delta_ns: 2147483647
min_delta_ns: 3352
mult: 61496110
shift: 32
set_next_event: hpet_next_event
set_mode: hpet_set_mode
event_handler: handle_nextevt_broadcast
Signed-off-by: Ingo Molnar <mingo@elte.hu>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Cc: john stultz <johnstul@us.ibm.com>
Cc: Roman Zippel <zippel@linux-m68k.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
2007-02-16 01:28:15 -08:00
struct tick_sched * tick_get_tick_sched ( int cpu )
{
return & per_cpu ( tick_cpu_sched , cpu ) ;
}
2007-02-16 01:28:03 -08:00
/*
* Must be called with interrupts disabled !
*/
static void tick_do_update_jiffies64 ( ktime_t now )
{
unsigned long ticks = 0 ;
ktime_t delta ;
/* Reevalute with xtime_lock held */
write_seqlock ( & xtime_lock ) ;
delta = ktime_sub ( now , last_jiffies_update ) ;
if ( delta . tv64 > = tick_period . tv64 ) {
delta = ktime_sub ( delta , tick_period ) ;
last_jiffies_update = ktime_add ( last_jiffies_update ,
tick_period ) ;
/* Slow path for long timeouts */
if ( unlikely ( delta . tv64 > = tick_period . tv64 ) ) {
s64 incr = ktime_to_ns ( tick_period ) ;
ticks = ktime_divns ( delta , incr ) ;
last_jiffies_update = ktime_add_ns ( last_jiffies_update ,
incr * ticks ) ;
}
do_timer ( + + ticks ) ;
}
write_sequnlock ( & xtime_lock ) ;
}
/*
* Initialize and return retrieve the jiffies update .
*/
static ktime_t tick_init_jiffy_update ( void )
{
ktime_t period ;
write_seqlock ( & xtime_lock ) ;
/* Did we start the jiffies update yet ? */
if ( last_jiffies_update . tv64 = = 0 )
last_jiffies_update = tick_next_period ;
period = last_jiffies_update ;
write_sequnlock ( & xtime_lock ) ;
return period ;
}
/*
* NOHZ - aka dynamic tick functionality
*/
# ifdef CONFIG_NO_HZ
/*
* NO HZ enabled ?
*/
static int tick_nohz_enabled __read_mostly = 1 ;
/*
* Enable / Disable tickless mode
*/
static int __init setup_tick_nohz ( char * str )
{
if ( ! strcmp ( str , " off " ) )
tick_nohz_enabled = 0 ;
else if ( ! strcmp ( str , " on " ) )
tick_nohz_enabled = 1 ;
else
return 0 ;
return 1 ;
}
__setup ( " nohz= " , setup_tick_nohz ) ;
/**
* tick_nohz_update_jiffies - update jiffies when idle was interrupted
*
* Called from interrupt entry when the CPU was idle
*
* In case the sched_tick was stopped on this CPU , we have to check if jiffies
* must be updated . Otherwise an interrupt handler could use a stale jiffy
* value . We do this unconditionally on any cpu , as we don ' t know whether the
* cpu , which has the update task assigned is in a long sleep .
*/
void tick_nohz_update_jiffies ( void )
{
int cpu = smp_processor_id ( ) ;
struct tick_sched * ts = & per_cpu ( tick_cpu_sched , cpu ) ;
unsigned long flags ;
ktime_t now ;
if ( ! ts - > tick_stopped )
return ;
cpu_clear ( cpu , nohz_cpu_mask ) ;
now = ktime_get ( ) ;
local_irq_save ( flags ) ;
tick_do_update_jiffies64 ( now ) ;
local_irq_restore ( flags ) ;
}
/**
* tick_nohz_stop_sched_tick - stop the idle tick from the idle task
*
* When the next event is more than a tick into the future , stop the idle tick
* Called either from the idle loop or from irq_exit ( ) when an idle period was
* just interrupted by an interrupt which did not cause a reschedule .
*/
void tick_nohz_stop_sched_tick ( void )
{
unsigned long seq , last_jiffies , next_jiffies , delta_jiffies , flags ;
struct tick_sched * ts ;
ktime_t last_update , expires , now , delta ;
int cpu ;
local_irq_save ( flags ) ;
cpu = smp_processor_id ( ) ;
ts = & per_cpu ( tick_cpu_sched , cpu ) ;
if ( unlikely ( ts - > nohz_mode = = NOHZ_MODE_INACTIVE ) )
goto end ;
if ( need_resched ( ) )
goto end ;
cpu = smp_processor_id ( ) ;
2007-02-19 18:12:05 +00:00
if ( unlikely ( local_softirq_pending ( ) ) )
printk ( KERN_ERR " NOHZ: local_softirq_pending %02x \n " ,
local_softirq_pending ( ) ) ;
2007-02-16 01:28:03 -08:00
now = ktime_get ( ) ;
/*
* When called from irq_exit we need to account the idle sleep time
* correctly .
*/
if ( ts - > tick_stopped ) {
delta = ktime_sub ( now , ts - > idle_entrytime ) ;
ts - > idle_sleeptime = ktime_add ( ts - > idle_sleeptime , delta ) ;
}
ts - > idle_entrytime = now ;
ts - > idle_calls + + ;
/* Read jiffies and the time when jiffies were updated last */
do {
seq = read_seqbegin ( & xtime_lock ) ;
last_update = last_jiffies_update ;
last_jiffies = jiffies ;
} while ( read_seqretry ( & xtime_lock , seq ) ) ;
/* Get the next timer wheel timer */
next_jiffies = get_next_timer_interrupt ( last_jiffies ) ;
delta_jiffies = next_jiffies - last_jiffies ;
2007-02-19 18:11:56 +00:00
if ( rcu_needs_cpu ( cpu ) )
delta_jiffies = 1 ;
2007-02-16 01:28:03 -08:00
/*
* Do not stop the tick , if we are only one off
* or if the cpu is required for rcu
*/
2007-02-19 18:11:56 +00:00
if ( ! ts - > tick_stopped & & delta_jiffies = = 1 )
2007-02-16 01:28:03 -08:00
goto out ;
/* Schedule the tick, if we are at least one jiffie off */
if ( ( long ) delta_jiffies > = 1 ) {
2007-02-19 18:11:56 +00:00
if ( delta_jiffies > 1 )
2007-02-16 01:28:03 -08:00
cpu_set ( cpu , nohz_cpu_mask ) ;
/*
* nohz_stop_sched_tick can be called several times before
* the nohz_restart_sched_tick is called . This happens when
* interrupts arrive which do not cause a reschedule . In the
* first call we save the current tick time , so we can restart
* the scheduler tick in nohz_restart_sched_tick .
*/
if ( ! ts - > tick_stopped ) {
ts - > idle_tick = ts - > sched_timer . expires ;
ts - > tick_stopped = 1 ;
ts - > idle_jiffies = last_jiffies ;
}
/*
* calculate the expiry time for the next timer wheel
* timer
*/
expires = ktime_add_ns ( last_update , tick_period . tv64 *
delta_jiffies ) ;
ts - > idle_expires = expires ;
ts - > idle_sleeps + + ;
if ( ts - > nohz_mode = = NOHZ_MODE_HIGHRES ) {
hrtimer_start ( & ts - > sched_timer , expires ,
HRTIMER_MODE_ABS ) ;
/* Check, if the timer was already in the past */
if ( hrtimer_active ( & ts - > sched_timer ) )
goto out ;
} else if ( ! tick_program_event ( expires , 0 ) )
goto out ;
/*
* We are past the event already . So we crossed a
* jiffie boundary . Update jiffies and raise the
* softirq .
*/
tick_do_update_jiffies64 ( ktime_get ( ) ) ;
cpu_clear ( cpu , nohz_cpu_mask ) ;
}
raise_softirq_irqoff ( TIMER_SOFTIRQ ) ;
out :
ts - > next_jiffies = next_jiffies ;
ts - > last_jiffies = last_jiffies ;
end :
local_irq_restore ( flags ) ;
}
/**
* nohz_restart_sched_tick - restart the idle tick from the idle task
*
* Restart the idle tick when the CPU is woken up from idle
*/
void tick_nohz_restart_sched_tick ( void )
{
int cpu = smp_processor_id ( ) ;
struct tick_sched * ts = & per_cpu ( tick_cpu_sched , cpu ) ;
unsigned long ticks ;
ktime_t now , delta ;
if ( ! ts - > tick_stopped )
return ;
/* Update jiffies first */
now = ktime_get ( ) ;
local_irq_disable ( ) ;
tick_do_update_jiffies64 ( now ) ;
cpu_clear ( cpu , nohz_cpu_mask ) ;
/* Account the idle time */
delta = ktime_sub ( now , ts - > idle_entrytime ) ;
ts - > idle_sleeptime = ktime_add ( ts - > idle_sleeptime , delta ) ;
/*
* We stopped the tick in idle . Update process times would miss the
* time we slept as update_process_times does only a 1 tick
* accounting . Enforce that this is accounted to idle !
*/
ticks = jiffies - ts - > idle_jiffies ;
/*
* We might be one off . Do not randomly account a huge number of ticks !
*/
if ( ticks & & ticks < LONG_MAX ) {
add_preempt_count ( HARDIRQ_OFFSET ) ;
account_system_time ( current , HARDIRQ_OFFSET ,
jiffies_to_cputime ( ticks ) ) ;
sub_preempt_count ( HARDIRQ_OFFSET ) ;
}
/*
* Cancel the scheduled timer and restore the tick
*/
ts - > tick_stopped = 0 ;
hrtimer_cancel ( & ts - > sched_timer ) ;
ts - > sched_timer . expires = ts - > idle_tick ;
while ( 1 ) {
/* Forward the time to expire in the future */
hrtimer_forward ( & ts - > sched_timer , now , tick_period ) ;
if ( ts - > nohz_mode = = NOHZ_MODE_HIGHRES ) {
hrtimer_start ( & ts - > sched_timer ,
ts - > sched_timer . expires ,
HRTIMER_MODE_ABS ) ;
/* Check, if the timer was already in the past */
if ( hrtimer_active ( & ts - > sched_timer ) )
break ;
} else {
if ( ! tick_program_event ( ts - > sched_timer . expires , 0 ) )
break ;
}
/* Update jiffies and reread time */
tick_do_update_jiffies64 ( now ) ;
now = ktime_get ( ) ;
}
local_irq_enable ( ) ;
}
static int tick_nohz_reprogram ( struct tick_sched * ts , ktime_t now )
{
hrtimer_forward ( & ts - > sched_timer , now , tick_period ) ;
return tick_program_event ( ts - > sched_timer . expires , 0 ) ;
}
/*
* The nohz low res interrupt handler
*/
static void tick_nohz_handler ( struct clock_event_device * dev )
{
struct tick_sched * ts = & __get_cpu_var ( tick_cpu_sched ) ;
struct pt_regs * regs = get_irq_regs ( ) ;
ktime_t now = ktime_get ( ) ;
dev - > next_event . tv64 = KTIME_MAX ;
/* Check, if the jiffies need an update */
tick_do_update_jiffies64 ( now ) ;
/*
* When we are idle and the tick is stopped , we have to touch
* the watchdog as we might not schedule for a really long
* time . This happens on complete idle SMP systems while
* waiting on the login prompt . We also increment the " start
* of idle " jiffy stamp so the idle accounting adjustment we
* do when we go busy again does not account too much ticks .
*/
if ( ts - > tick_stopped ) {
touch_softlockup_watchdog ( ) ;
ts - > idle_jiffies + + ;
}
update_process_times ( user_mode ( regs ) ) ;
profile_tick ( CPU_PROFILING ) ;
/* Do not restart, when we are in the idle loop */
if ( ts - > tick_stopped )
return ;
while ( tick_nohz_reprogram ( ts , now ) ) {
now = ktime_get ( ) ;
tick_do_update_jiffies64 ( now ) ;
}
}
/**
* tick_nohz_switch_to_nohz - switch to nohz mode
*/
static void tick_nohz_switch_to_nohz ( void )
{
struct tick_sched * ts = & __get_cpu_var ( tick_cpu_sched ) ;
ktime_t next ;
if ( ! tick_nohz_enabled )
return ;
local_irq_disable ( ) ;
if ( tick_switch_to_oneshot ( tick_nohz_handler ) ) {
local_irq_enable ( ) ;
return ;
}
ts - > nohz_mode = NOHZ_MODE_LOWRES ;
/*
* Recycle the hrtimer in ts , so we can share the
* hrtimer_forward with the highres code .
*/
hrtimer_init ( & ts - > sched_timer , CLOCK_MONOTONIC , HRTIMER_MODE_ABS ) ;
/* Get the next period */
next = tick_init_jiffy_update ( ) ;
for ( ; ; ) {
ts - > sched_timer . expires = next ;
if ( ! tick_program_event ( next , 0 ) )
break ;
next = ktime_add ( next , tick_period ) ;
}
local_irq_enable ( ) ;
printk ( KERN_INFO " Switched to NOHz mode on CPU #%d \n " ,
smp_processor_id ( ) ) ;
}
# else
static inline void tick_nohz_switch_to_nohz ( void ) { }
# endif /* NO_HZ */
/*
* High resolution timer specific code
*/
# ifdef CONFIG_HIGH_RES_TIMERS
/*
* We rearm the timer until we get disabled by the idle code
* Called with interrupts disabled and timer - > base - > cpu_base - > lock held .
*/
static enum hrtimer_restart tick_sched_timer ( struct hrtimer * timer )
{
struct tick_sched * ts =
container_of ( timer , struct tick_sched , sched_timer ) ;
struct hrtimer_cpu_base * base = timer - > base - > cpu_base ;
struct pt_regs * regs = get_irq_regs ( ) ;
ktime_t now = ktime_get ( ) ;
/* Check, if the jiffies need an update */
tick_do_update_jiffies64 ( now ) ;
/*
* Do not call , when we are not in irq context and have
* no valid regs pointer
*/
if ( regs ) {
/*
* When we are idle and the tick is stopped , we have to touch
* the watchdog as we might not schedule for a really long
* time . This happens on complete idle SMP systems while
* waiting on the login prompt . We also increment the " start of
* idle " jiffy stamp so the idle accounting adjustment we do
* when we go busy again does not account too much ticks .
*/
if ( ts - > tick_stopped ) {
touch_softlockup_watchdog ( ) ;
ts - > idle_jiffies + + ;
}
/*
* update_process_times ( ) might take tasklist_lock , hence
* drop the base lock . sched - tick hrtimers are per - CPU and
* never accessible by userspace APIs , so this is safe to do .
*/
spin_unlock ( & base - > lock ) ;
update_process_times ( user_mode ( regs ) ) ;
profile_tick ( CPU_PROFILING ) ;
spin_lock ( & base - > lock ) ;
}
/* Do not restart, when we are in the idle loop */
if ( ts - > tick_stopped )
return HRTIMER_NORESTART ;
hrtimer_forward ( timer , now , tick_period ) ;
return HRTIMER_RESTART ;
}
/**
* tick_setup_sched_timer - setup the tick emulation timer
*/
void tick_setup_sched_timer ( void )
{
struct tick_sched * ts = & __get_cpu_var ( tick_cpu_sched ) ;
ktime_t now = ktime_get ( ) ;
/*
* Emulate tick processing via per - CPU hrtimers :
*/
hrtimer_init ( & ts - > sched_timer , CLOCK_MONOTONIC , HRTIMER_MODE_ABS ) ;
ts - > sched_timer . function = tick_sched_timer ;
ts - > sched_timer . cb_mode = HRTIMER_CB_IRQSAFE_NO_SOFTIRQ ;
/* Get the next period */
ts - > sched_timer . expires = tick_init_jiffy_update ( ) ;
for ( ; ; ) {
hrtimer_forward ( & ts - > sched_timer , now , tick_period ) ;
hrtimer_start ( & ts - > sched_timer , ts - > sched_timer . expires ,
HRTIMER_MODE_ABS ) ;
/* Check, if the timer was already in the past */
if ( hrtimer_active ( & ts - > sched_timer ) )
break ;
now = ktime_get ( ) ;
}
# ifdef CONFIG_NO_HZ
if ( tick_nohz_enabled )
ts - > nohz_mode = NOHZ_MODE_HIGHRES ;
# endif
}
void tick_cancel_sched_timer ( int cpu )
{
struct tick_sched * ts = & per_cpu ( tick_cpu_sched , cpu ) ;
if ( ts - > sched_timer . base )
hrtimer_cancel ( & ts - > sched_timer ) ;
ts - > tick_stopped = 0 ;
ts - > nohz_mode = NOHZ_MODE_INACTIVE ;
}
# endif /* HIGH_RES_TIMERS */
/**
* Async notification about clocksource changes
*/
void tick_clock_notify ( void )
{
int cpu ;
for_each_possible_cpu ( cpu )
set_bit ( 0 , & per_cpu ( tick_cpu_sched , cpu ) . check_clocks ) ;
}
/*
* Async notification about clock event changes
*/
void tick_oneshot_notify ( void )
{
struct tick_sched * ts = & __get_cpu_var ( tick_cpu_sched ) ;
set_bit ( 0 , & ts - > check_clocks ) ;
}
/**
* Check , if a change happened , which makes oneshot possible .
*
* Called cyclic from the hrtimer softirq ( driven by the timer
* softirq ) allow_nohz signals , that we can switch into low - res nohz
* mode , because high resolution timers are disabled ( either compile
* or runtime ) .
*/
int tick_check_oneshot_change ( int allow_nohz )
{
struct tick_sched * ts = & __get_cpu_var ( tick_cpu_sched ) ;
if ( ! test_and_clear_bit ( 0 , & ts - > check_clocks ) )
return 0 ;
if ( ts - > nohz_mode ! = NOHZ_MODE_INACTIVE )
return 0 ;
if ( ! timekeeping_is_continuous ( ) | | ! tick_is_oneshot_available ( ) )
return 0 ;
if ( ! allow_nohz )
return 1 ;
tick_nohz_switch_to_nohz ( ) ;
return 0 ;
}