2005-04-16 15:20:36 -07:00
/*
* processor_idle - idle state submodule to the ACPI processor driver
*
* Copyright ( C ) 2001 , 2002 Andy Grover < andrew . grover @ intel . com >
* Copyright ( C ) 2001 , 2002 Paul Diefenbaugh < paul . s . diefenbaugh @ intel . com >
2006-06-24 19:37:00 -04:00
* Copyright ( C ) 2004 , 2005 Dominik Brodowski < linux @ brodo . de >
2005-04-16 15:20:36 -07:00
* Copyright ( C ) 2004 Anil S Keshavamurthy < anil . s . keshavamurthy @ intel . com >
* - Added processor hotplug support
2005-04-15 15:07:10 -04:00
* Copyright ( C ) 2005 Venkatesh Pallipadi < venkatesh . pallipadi @ intel . com >
* - Added support for C3 on SMP
2005-04-16 15:20:36 -07:00
*
* ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~
*
* This program is free software ; you can redistribute it and / or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation ; either version 2 of the License , or ( at
* your option ) any later version .
*
* This program is distributed in the hope that it will be useful , but
* WITHOUT ANY WARRANTY ; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE . See the GNU
* General Public License for more details .
*
* You should have received a copy of the GNU General Public License along
* with this program ; if not , write to the Free Software Foundation , Inc . ,
* 59 Temple Place , Suite 330 , Boston , MA 02111 - 1307 USA .
*
* ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~
*/
# include <linux/kernel.h>
# include <linux/module.h>
# include <linux/init.h>
# include <linux/cpufreq.h>
# include <linux/proc_fs.h>
# include <linux/seq_file.h>
# include <linux/acpi.h>
# include <linux/dmi.h>
# include <linux/moduleparam.h>
2005-10-30 15:03:48 -08:00
# include <linux/sched.h> /* need_resched() */
2006-09-30 23:27:17 -07:00
# include <linux/latency.h>
2005-04-16 15:20:36 -07:00
# include <asm/io.h>
# include <asm/uaccess.h>
# include <acpi/acpi_bus.h>
# include <acpi/processor.h>
# define ACPI_PROCESSOR_COMPONENT 0x01000000
# define ACPI_PROCESSOR_CLASS "processor"
# define ACPI_PROCESSOR_DRIVER_NAME "ACPI Processor Driver"
# define _COMPONENT ACPI_PROCESSOR_COMPONENT
2005-08-05 00:44:28 -04:00
ACPI_MODULE_NAME ( " acpi_processor " )
2005-04-16 15:20:36 -07:00
# define ACPI_PROCESSOR_FILE_POWER "power"
# define US_TO_PM_TIMER_TICKS(t) ((t * (PM_TIMER_FREQUENCY / 1000)) / 1000)
# define C2_OVERHEAD 4 /* 1us (3.579 ticks per us) */
# define C3_OVERHEAD 4 /* 1us (3.579 ticks per us) */
2006-04-27 05:25:00 -04:00
static void ( * pm_idle_save ) ( void ) __read_mostly ;
2005-04-16 15:20:36 -07:00
module_param ( max_cstate , uint , 0644 ) ;
2006-04-27 05:25:00 -04:00
static unsigned int nocst __read_mostly ;
2005-04-16 15:20:36 -07:00
module_param ( nocst , uint , 0000 ) ;
/*
* bm_history - - bit - mask with a bit per jiffy of bus - master activity
* 1000 HZ : 0xFFFFFFFF : 32 jiffies = 32 ms
* 800 HZ : 0xFFFFFFFF : 32 jiffies = 40 ms
* 100 HZ : 0x0000000F : 4 jiffies = 40 ms
* reduce history for more aggressive entry into C3
*/
2006-04-27 05:25:00 -04:00
static unsigned int bm_history __read_mostly =
2005-08-05 00:44:28 -04:00
( HZ > = 800 ? 0xFFFFFFFF : ( ( 1U < < ( HZ / 25 ) ) - 1 ) ) ;
2005-04-16 15:20:36 -07:00
module_param ( bm_history , uint , 0644 ) ;
/* --------------------------------------------------------------------------
Power Management
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - */
/*
* IBM ThinkPad R40e crashes mysteriously when going into C2 or C3 .
* For now disable this . Probably a bug somewhere else .
*
* To skip this limit , boot / load with a large max_cstate limit .
*/
2005-06-22 18:37:00 -04:00
static int set_max_cstate ( struct dmi_system_id * id )
2005-04-16 15:20:36 -07:00
{
if ( max_cstate > ACPI_PROCESSOR_MAX_POWER )
return 0 ;
2005-08-03 00:22:52 -04:00
printk ( KERN_NOTICE PREFIX " %s detected - limiting to C%ld max_cstate. "
2005-08-05 00:44:28 -04:00
" Override with \" processor.max_cstate=%d \" \n " , id - > ident ,
( long ) id - > driver_data , ACPI_PROCESSOR_MAX_POWER + 1 ) ;
2005-04-16 15:20:36 -07:00
2005-08-03 00:22:52 -04:00
max_cstate = ( long ) id - > driver_data ;
2005-04-16 15:20:36 -07:00
return 0 ;
}
2006-02-03 21:51:23 +01:00
/* Actually this shouldn't be __cpuinitdata, would be better to fix the
callers to only run once - AK */
static struct dmi_system_id __cpuinitdata processor_power_dmi_table [ ] = {
2006-05-29 07:16:00 -04:00
{ set_max_cstate , " IBM ThinkPad R40e " , {
DMI_MATCH ( DMI_BIOS_VENDOR , " IBM " ) ,
DMI_MATCH ( DMI_BIOS_VERSION , " 1SET70WW " ) } , ( void * ) 1 } ,
2006-01-06 01:31:00 -05:00
{ set_max_cstate , " IBM ThinkPad R40e " , {
DMI_MATCH ( DMI_BIOS_VENDOR , " IBM " ) ,
DMI_MATCH ( DMI_BIOS_VERSION , " 1SET60WW " ) } , ( void * ) 1 } ,
{ set_max_cstate , " IBM ThinkPad R40e " , {
DMI_MATCH ( DMI_BIOS_VENDOR , " IBM " ) ,
DMI_MATCH ( DMI_BIOS_VERSION , " 1SET43WW " ) } , ( void * ) 1 } ,
{ set_max_cstate , " IBM ThinkPad R40e " , {
DMI_MATCH ( DMI_BIOS_VENDOR , " IBM " ) ,
DMI_MATCH ( DMI_BIOS_VERSION , " 1SET45WW " ) } , ( void * ) 1 } ,
{ set_max_cstate , " IBM ThinkPad R40e " , {
DMI_MATCH ( DMI_BIOS_VENDOR , " IBM " ) ,
DMI_MATCH ( DMI_BIOS_VERSION , " 1SET47WW " ) } , ( void * ) 1 } ,
{ set_max_cstate , " IBM ThinkPad R40e " , {
DMI_MATCH ( DMI_BIOS_VENDOR , " IBM " ) ,
DMI_MATCH ( DMI_BIOS_VERSION , " 1SET50WW " ) } , ( void * ) 1 } ,
{ set_max_cstate , " IBM ThinkPad R40e " , {
DMI_MATCH ( DMI_BIOS_VENDOR , " IBM " ) ,
DMI_MATCH ( DMI_BIOS_VERSION , " 1SET52WW " ) } , ( void * ) 1 } ,
{ set_max_cstate , " IBM ThinkPad R40e " , {
DMI_MATCH ( DMI_BIOS_VENDOR , " IBM " ) ,
DMI_MATCH ( DMI_BIOS_VERSION , " 1SET55WW " ) } , ( void * ) 1 } ,
{ set_max_cstate , " IBM ThinkPad R40e " , {
DMI_MATCH ( DMI_BIOS_VENDOR , " IBM " ) ,
DMI_MATCH ( DMI_BIOS_VERSION , " 1SET56WW " ) } , ( void * ) 1 } ,
{ set_max_cstate , " IBM ThinkPad R40e " , {
DMI_MATCH ( DMI_BIOS_VENDOR , " IBM " ) ,
DMI_MATCH ( DMI_BIOS_VERSION , " 1SET59WW " ) } , ( void * ) 1 } ,
{ set_max_cstate , " IBM ThinkPad R40e " , {
DMI_MATCH ( DMI_BIOS_VENDOR , " IBM " ) ,
DMI_MATCH ( DMI_BIOS_VERSION , " 1SET60WW " ) } , ( void * ) 1 } ,
{ set_max_cstate , " IBM ThinkPad R40e " , {
DMI_MATCH ( DMI_BIOS_VENDOR , " IBM " ) ,
DMI_MATCH ( DMI_BIOS_VERSION , " 1SET61WW " ) } , ( void * ) 1 } ,
{ set_max_cstate , " IBM ThinkPad R40e " , {
DMI_MATCH ( DMI_BIOS_VENDOR , " IBM " ) ,
DMI_MATCH ( DMI_BIOS_VERSION , " 1SET62WW " ) } , ( void * ) 1 } ,
{ set_max_cstate , " IBM ThinkPad R40e " , {
DMI_MATCH ( DMI_BIOS_VENDOR , " IBM " ) ,
DMI_MATCH ( DMI_BIOS_VERSION , " 1SET64WW " ) } , ( void * ) 1 } ,
{ set_max_cstate , " IBM ThinkPad R40e " , {
DMI_MATCH ( DMI_BIOS_VENDOR , " IBM " ) ,
DMI_MATCH ( DMI_BIOS_VERSION , " 1SET65WW " ) } , ( void * ) 1 } ,
{ set_max_cstate , " IBM ThinkPad R40e " , {
DMI_MATCH ( DMI_BIOS_VENDOR , " IBM " ) ,
DMI_MATCH ( DMI_BIOS_VERSION , " 1SET68WW " ) } , ( void * ) 1 } ,
{ set_max_cstate , " Medion 41700 " , {
DMI_MATCH ( DMI_BIOS_VENDOR , " Phoenix Technologies LTD " ) ,
DMI_MATCH ( DMI_BIOS_VERSION , " R01-A1J " ) } , ( void * ) 1 } ,
{ set_max_cstate , " Clevo 5600D " , {
DMI_MATCH ( DMI_BIOS_VENDOR , " Phoenix Technologies LTD " ) ,
DMI_MATCH ( DMI_BIOS_VERSION , " SHE845M0.86C.0013.D.0302131307 " ) } ,
2005-08-05 00:44:28 -04:00
( void * ) 2 } ,
2005-04-16 15:20:36 -07:00
{ } ,
} ;
2005-08-05 00:44:28 -04:00
static inline u32 ticks_elapsed ( u32 t1 , u32 t2 )
2005-04-16 15:20:36 -07:00
{
if ( t2 > = t1 )
return ( t2 - t1 ) ;
else if ( ! acpi_fadt . tmr_val_ext )
return ( ( ( 0x00FFFFFF - t1 ) + t2 ) & 0x00FFFFFF ) ;
else
return ( ( 0xFFFFFFFF - t1 ) + t2 ) ;
}
static void
2005-08-05 00:44:28 -04:00
acpi_processor_power_activate ( struct acpi_processor * pr ,
struct acpi_processor_cx * new )
2005-04-16 15:20:36 -07:00
{
2005-08-05 00:44:28 -04:00
struct acpi_processor_cx * old ;
2005-04-16 15:20:36 -07:00
if ( ! pr | | ! new )
return ;
old = pr - > power . state ;
if ( old )
old - > promotion . count = 0 ;
2005-08-05 00:44:28 -04:00
new - > demotion . count = 0 ;
2005-04-16 15:20:36 -07:00
/* Cleanup from old state. */
if ( old ) {
switch ( old - > type ) {
case ACPI_STATE_C3 :
/* Disable bus master reload */
2005-04-15 15:07:10 -04:00
if ( new - > type ! = ACPI_STATE_C3 & & pr - > flags . bm_check )
2005-08-05 00:44:28 -04:00
acpi_set_register ( ACPI_BITREG_BUS_MASTER_RLD , 0 ,
ACPI_MTX_DO_NOT_LOCK ) ;
2005-04-16 15:20:36 -07:00
break ;
}
}
/* Prepare to use new state. */
switch ( new - > type ) {
case ACPI_STATE_C3 :
/* Enable bus master reload */
2005-04-15 15:07:10 -04:00
if ( old - > type ! = ACPI_STATE_C3 & & pr - > flags . bm_check )
2005-08-05 00:44:28 -04:00
acpi_set_register ( ACPI_BITREG_BUS_MASTER_RLD , 1 ,
ACPI_MTX_DO_NOT_LOCK ) ;
2005-04-16 15:20:36 -07:00
break ;
}
pr - > power . state = new ;
return ;
}
[PATCH] sched: resched and cpu_idle rework
Make some changes to the NEED_RESCHED and POLLING_NRFLAG to reduce
confusion, and make their semantics rigid. Improves efficiency of
resched_task and some cpu_idle routines.
* In resched_task:
- TIF_NEED_RESCHED is only cleared with the task's runqueue lock held,
and as we hold it during resched_task, then there is no need for an
atomic test and set there. The only other time this should be set is
when the task's quantum expires, in the timer interrupt - this is
protected against because the rq lock is irq-safe.
- If TIF_NEED_RESCHED is set, then we don't need to do anything. It
won't get unset until the task get's schedule()d off.
- If we are running on the same CPU as the task we resched, then set
TIF_NEED_RESCHED and no further action is required.
- If we are running on another CPU, and TIF_POLLING_NRFLAG is *not* set
after TIF_NEED_RESCHED has been set, then we need to send an IPI.
Using these rules, we are able to remove the test and set operation in
resched_task, and make clear the previously vague semantics of
POLLING_NRFLAG.
* In idle routines:
- Enter cpu_idle with preempt disabled. When the need_resched() condition
becomes true, explicitly call schedule(). This makes things a bit clearer
(IMO), but haven't updated all architectures yet.
- Many do a test and clear of TIF_NEED_RESCHED for some reason. According
to the resched_task rules, this isn't needed (and actually breaks the
assumption that TIF_NEED_RESCHED is only cleared with the runqueue lock
held). So remove that. Generally one less locked memory op when switching
to the idle thread.
- Many idle routines clear TIF_POLLING_NRFLAG, and only set it in the inner
most polling idle loops. The above resched_task semantics allow it to be
set until before the last time need_resched() is checked before going into
a halt requiring interrupt wakeup.
Many idle routines simply never enter such a halt, and so POLLING_NRFLAG
can be always left set, completely eliminating resched IPIs when rescheduling
the idle task.
POLLING_NRFLAG width can be increased, to reduce the chance of resched IPIs.
Signed-off-by: Nick Piggin <npiggin@suse.de>
Cc: Ingo Molnar <mingo@elte.hu>
Cc: Con Kolivas <kernel@kolivas.org>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
2005-11-08 21:39:04 -08:00
static void acpi_safe_halt ( void )
{
2006-06-26 13:59:11 +02:00
current_thread_info ( ) - > status & = ~ TS_POLLING ;
[PATCH] sched: fix bad missed wakeups in the i386, x86_64, ia64, ACPI and APM idle code
Fernando Lopez-Lezcano reported frequent scheduling latencies and audio
xruns starting at the 2.6.18-rt kernel, and those problems persisted all
until current -rt kernels. The latencies were serious and unjustified by
system load, often in the milliseconds range.
After a patient and heroic multi-month effort of Fernando, where he
tested dozens of kernels, tried various configs, boot options,
test-patches of mine and provided latency traces of those incidents, the
following 'smoking gun' trace was captured by him:
_------=> CPU#
/ _-----=> irqs-off
| / _----=> need-resched
|| / _---=> hardirq/softirq
||| / _--=> preempt-depth
|||| /
||||| delay
cmd pid ||||| time | caller
\ / ||||| \ | /
IRQ_19-1479 1D..1 0us : __trace_start_sched_wakeup (try_to_wake_up)
IRQ_19-1479 1D..1 0us : __trace_start_sched_wakeup <<...>-5856> (37 0)
IRQ_19-1479 1D..1 0us : __trace_start_sched_wakeup (c01262ba 0 0)
IRQ_19-1479 1D..1 0us : resched_task (try_to_wake_up)
IRQ_19-1479 1D..1 0us : __spin_unlock_irqrestore (try_to_wake_up)
...
<idle>-0 1...1 11us!: default_idle (cpu_idle)
...
<idle>-0 0Dn.1 602us : smp_apic_timer_interrupt (c0103baf 1 0)
...
<...>-5856 0D..2 618us : __switch_to (__schedule)
<...>-5856 0D..2 618us : __schedule <<idle>-0> (20 162)
<...>-5856 0D..2 619us : __spin_unlock_irq (__schedule)
<...>-5856 0...1 619us : trace_stop_sched_switched (__schedule)
<...>-5856 0D..1 619us : trace_stop_sched_switched <<...>-5856> (37 0)
what is visible in this trace is that CPU#1 ran try_to_wake_up() for
PID:5856, it placed PID:5856 on CPU#0's runqueue and ran resched_task()
for CPU#0. But it decided to not send an IPI that no CPU - due to
TS_POLLING. But CPU#0 never woke up after its NEED_RESCHED bit was set,
and only rescheduled to PID:5856 upon the next lapic timer IRQ. The
result was a 600+ usecs latency and a missed wakeup!
the bug turned out to be an idle-wakeup bug introduced into the mainline
kernel this summer via an optimization in the x86_64 tree:
commit 495ab9c045e1b0e5c82951b762257fe1c9d81564
Author: Andi Kleen <ak@suse.de>
Date: Mon Jun 26 13:59:11 2006 +0200
[PATCH] i386/x86-64/ia64: Move polling flag into thread_info_status
During some profiling I noticed that default_idle causes a lot of
memory traffic. I think that is caused by the atomic operations
to clear/set the polling flag in thread_info. There is actually
no reason to make this atomic - only the idle thread does it
to itself, other CPUs only read it. So I moved it into ti->status.
the problem is this type of change:
if (!hlt_counter && boot_cpu_data.hlt_works_ok) {
- clear_thread_flag(TIF_POLLING_NRFLAG);
+ current_thread_info()->status &= ~TS_POLLING;
smp_mb__after_clear_bit();
while (!need_resched()) {
local_irq_disable();
this changes clear_thread_flag() to an explicit clearing of TS_POLLING.
clear_thread_flag() is defined as:
clear_bit(flag, &ti->flags);
and clear_bit() is a LOCK-ed atomic instruction on all x86 platforms:
static inline void clear_bit(int nr, volatile unsigned long * addr)
{
__asm__ __volatile__( LOCK_PREFIX
"btrl %1,%0"
hence smp_mb__after_clear_bit() is defined as a simple compile barrier:
#define smp_mb__after_clear_bit() barrier()
but the explicit TS_POLLING clearing introduced by the patch:
+ current_thread_info()->status &= ~TS_POLLING;
is not an atomic op! So the clearing of the TS_POLLING bit is freely
reorderable with the reading of the NEED_RESCHED bit - and both now
reside in different memory addresses.
CPU idle wakeup very much depends on ordered memory ops, the clearing of
the TS_POLLING flag must always be done before we test need_resched()
and hit the idle instruction(s). [Symmetrically, the wakeup code needs
to set NEED_RESCHED before it tests the TS_POLLING flag, so memory
ordering is paramount.]
Fernando's dual-core Athlon64 system has a sufficiently advanced memory
ordering model so that it triggered this scenario very often.
( And it also turned out that the reason why these latencies never
triggered on my testsystems is that i routinely use idle=poll, which
was the only idle variant not affected by this bug. )
The fix is to change the smp_mb__after_clear_bit() to an smp_mb(), to
act as an absolute barrier between the TS_POLLING write and the
NEED_RESCHED read. This affects almost all idling methods (default,
ACPI, APM), on all 3 x86 architectures: i386, x86_64, ia64.
Signed-off-by: Ingo Molnar <mingo@elte.hu>
Tested-by: Fernando Lopez-Lezcano <nando@ccrma.Stanford.EDU>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
2006-12-22 01:11:56 -08:00
/*
* TS_POLLING - cleared state must be visible before we
* test NEED_RESCHED :
*/
smp_mb ( ) ;
[PATCH] sched: resched and cpu_idle rework
Make some changes to the NEED_RESCHED and POLLING_NRFLAG to reduce
confusion, and make their semantics rigid. Improves efficiency of
resched_task and some cpu_idle routines.
* In resched_task:
- TIF_NEED_RESCHED is only cleared with the task's runqueue lock held,
and as we hold it during resched_task, then there is no need for an
atomic test and set there. The only other time this should be set is
when the task's quantum expires, in the timer interrupt - this is
protected against because the rq lock is irq-safe.
- If TIF_NEED_RESCHED is set, then we don't need to do anything. It
won't get unset until the task get's schedule()d off.
- If we are running on the same CPU as the task we resched, then set
TIF_NEED_RESCHED and no further action is required.
- If we are running on another CPU, and TIF_POLLING_NRFLAG is *not* set
after TIF_NEED_RESCHED has been set, then we need to send an IPI.
Using these rules, we are able to remove the test and set operation in
resched_task, and make clear the previously vague semantics of
POLLING_NRFLAG.
* In idle routines:
- Enter cpu_idle with preempt disabled. When the need_resched() condition
becomes true, explicitly call schedule(). This makes things a bit clearer
(IMO), but haven't updated all architectures yet.
- Many do a test and clear of TIF_NEED_RESCHED for some reason. According
to the resched_task rules, this isn't needed (and actually breaks the
assumption that TIF_NEED_RESCHED is only cleared with the runqueue lock
held). So remove that. Generally one less locked memory op when switching
to the idle thread.
- Many idle routines clear TIF_POLLING_NRFLAG, and only set it in the inner
most polling idle loops. The above resched_task semantics allow it to be
set until before the last time need_resched() is checked before going into
a halt requiring interrupt wakeup.
Many idle routines simply never enter such a halt, and so POLLING_NRFLAG
can be always left set, completely eliminating resched IPIs when rescheduling
the idle task.
POLLING_NRFLAG width can be increased, to reduce the chance of resched IPIs.
Signed-off-by: Nick Piggin <npiggin@suse.de>
Cc: Ingo Molnar <mingo@elte.hu>
Cc: Con Kolivas <kernel@kolivas.org>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
2005-11-08 21:39:04 -08:00
if ( ! need_resched ( ) )
safe_halt ( ) ;
2006-06-26 13:59:11 +02:00
current_thread_info ( ) - > status | = TS_POLLING ;
[PATCH] sched: resched and cpu_idle rework
Make some changes to the NEED_RESCHED and POLLING_NRFLAG to reduce
confusion, and make their semantics rigid. Improves efficiency of
resched_task and some cpu_idle routines.
* In resched_task:
- TIF_NEED_RESCHED is only cleared with the task's runqueue lock held,
and as we hold it during resched_task, then there is no need for an
atomic test and set there. The only other time this should be set is
when the task's quantum expires, in the timer interrupt - this is
protected against because the rq lock is irq-safe.
- If TIF_NEED_RESCHED is set, then we don't need to do anything. It
won't get unset until the task get's schedule()d off.
- If we are running on the same CPU as the task we resched, then set
TIF_NEED_RESCHED and no further action is required.
- If we are running on another CPU, and TIF_POLLING_NRFLAG is *not* set
after TIF_NEED_RESCHED has been set, then we need to send an IPI.
Using these rules, we are able to remove the test and set operation in
resched_task, and make clear the previously vague semantics of
POLLING_NRFLAG.
* In idle routines:
- Enter cpu_idle with preempt disabled. When the need_resched() condition
becomes true, explicitly call schedule(). This makes things a bit clearer
(IMO), but haven't updated all architectures yet.
- Many do a test and clear of TIF_NEED_RESCHED for some reason. According
to the resched_task rules, this isn't needed (and actually breaks the
assumption that TIF_NEED_RESCHED is only cleared with the runqueue lock
held). So remove that. Generally one less locked memory op when switching
to the idle thread.
- Many idle routines clear TIF_POLLING_NRFLAG, and only set it in the inner
most polling idle loops. The above resched_task semantics allow it to be
set until before the last time need_resched() is checked before going into
a halt requiring interrupt wakeup.
Many idle routines simply never enter such a halt, and so POLLING_NRFLAG
can be always left set, completely eliminating resched IPIs when rescheduling
the idle task.
POLLING_NRFLAG width can be increased, to reduce the chance of resched IPIs.
Signed-off-by: Nick Piggin <npiggin@suse.de>
Cc: Ingo Molnar <mingo@elte.hu>
Cc: Con Kolivas <kernel@kolivas.org>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
2005-11-08 21:39:04 -08:00
}
2005-08-05 00:44:28 -04:00
static atomic_t c3_cpu_count ;
2005-04-16 15:20:36 -07:00
2006-09-25 16:28:13 -07:00
/* Common C-state entry for C2, C3, .. */
static void acpi_cstate_enter ( struct acpi_processor_cx * cstate )
{
if ( cstate - > space_id = = ACPI_CSTATE_FFH ) {
/* Call into architectural FFH based C-state */
acpi_processor_ffh_cstate_enter ( cstate ) ;
} else {
int unused ;
/* IO port based C-state */
inb ( cstate - > address ) ;
/* Dummy wait op - must do something useless after P_LVL2 read
because chipsets cannot guarantee that STPCLK # signal
gets asserted in time to freeze execution properly . */
unused = inl ( acpi_fadt . xpm_tmr_blk . address ) ;
}
}
2005-08-05 00:44:28 -04:00
static void acpi_processor_idle ( void )
2005-04-16 15:20:36 -07:00
{
2005-08-05 00:44:28 -04:00
struct acpi_processor * pr = NULL ;
2005-04-16 15:20:36 -07:00
struct acpi_processor_cx * cx = NULL ;
struct acpi_processor_cx * next_state = NULL ;
2005-08-05 00:44:28 -04:00
int sleep_ticks = 0 ;
u32 t1 , t2 = 0 ;
2005-04-16 15:20:36 -07:00
[PATCH] sched: resched and cpu_idle rework
Make some changes to the NEED_RESCHED and POLLING_NRFLAG to reduce
confusion, and make their semantics rigid. Improves efficiency of
resched_task and some cpu_idle routines.
* In resched_task:
- TIF_NEED_RESCHED is only cleared with the task's runqueue lock held,
and as we hold it during resched_task, then there is no need for an
atomic test and set there. The only other time this should be set is
when the task's quantum expires, in the timer interrupt - this is
protected against because the rq lock is irq-safe.
- If TIF_NEED_RESCHED is set, then we don't need to do anything. It
won't get unset until the task get's schedule()d off.
- If we are running on the same CPU as the task we resched, then set
TIF_NEED_RESCHED and no further action is required.
- If we are running on another CPU, and TIF_POLLING_NRFLAG is *not* set
after TIF_NEED_RESCHED has been set, then we need to send an IPI.
Using these rules, we are able to remove the test and set operation in
resched_task, and make clear the previously vague semantics of
POLLING_NRFLAG.
* In idle routines:
- Enter cpu_idle with preempt disabled. When the need_resched() condition
becomes true, explicitly call schedule(). This makes things a bit clearer
(IMO), but haven't updated all architectures yet.
- Many do a test and clear of TIF_NEED_RESCHED for some reason. According
to the resched_task rules, this isn't needed (and actually breaks the
assumption that TIF_NEED_RESCHED is only cleared with the runqueue lock
held). So remove that. Generally one less locked memory op when switching
to the idle thread.
- Many idle routines clear TIF_POLLING_NRFLAG, and only set it in the inner
most polling idle loops. The above resched_task semantics allow it to be
set until before the last time need_resched() is checked before going into
a halt requiring interrupt wakeup.
Many idle routines simply never enter such a halt, and so POLLING_NRFLAG
can be always left set, completely eliminating resched IPIs when rescheduling
the idle task.
POLLING_NRFLAG width can be increased, to reduce the chance of resched IPIs.
Signed-off-by: Nick Piggin <npiggin@suse.de>
Cc: Ingo Molnar <mingo@elte.hu>
Cc: Con Kolivas <kernel@kolivas.org>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
2005-11-08 21:39:04 -08:00
pr = processors [ smp_processor_id ( ) ] ;
2005-04-16 15:20:36 -07:00
if ( ! pr )
return ;
/*
* Interrupts must be disabled during bus mastering calculations and
* for C2 / C3 transitions .
*/
local_irq_disable ( ) ;
/*
* Check whether we truly need to go idle , or should
* reschedule :
*/
if ( unlikely ( need_resched ( ) ) ) {
local_irq_enable ( ) ;
return ;
}
cx = pr - > power . state ;
[PATCH] sched: resched and cpu_idle rework
Make some changes to the NEED_RESCHED and POLLING_NRFLAG to reduce
confusion, and make their semantics rigid. Improves efficiency of
resched_task and some cpu_idle routines.
* In resched_task:
- TIF_NEED_RESCHED is only cleared with the task's runqueue lock held,
and as we hold it during resched_task, then there is no need for an
atomic test and set there. The only other time this should be set is
when the task's quantum expires, in the timer interrupt - this is
protected against because the rq lock is irq-safe.
- If TIF_NEED_RESCHED is set, then we don't need to do anything. It
won't get unset until the task get's schedule()d off.
- If we are running on the same CPU as the task we resched, then set
TIF_NEED_RESCHED and no further action is required.
- If we are running on another CPU, and TIF_POLLING_NRFLAG is *not* set
after TIF_NEED_RESCHED has been set, then we need to send an IPI.
Using these rules, we are able to remove the test and set operation in
resched_task, and make clear the previously vague semantics of
POLLING_NRFLAG.
* In idle routines:
- Enter cpu_idle with preempt disabled. When the need_resched() condition
becomes true, explicitly call schedule(). This makes things a bit clearer
(IMO), but haven't updated all architectures yet.
- Many do a test and clear of TIF_NEED_RESCHED for some reason. According
to the resched_task rules, this isn't needed (and actually breaks the
assumption that TIF_NEED_RESCHED is only cleared with the runqueue lock
held). So remove that. Generally one less locked memory op when switching
to the idle thread.
- Many idle routines clear TIF_POLLING_NRFLAG, and only set it in the inner
most polling idle loops. The above resched_task semantics allow it to be
set until before the last time need_resched() is checked before going into
a halt requiring interrupt wakeup.
Many idle routines simply never enter such a halt, and so POLLING_NRFLAG
can be always left set, completely eliminating resched IPIs when rescheduling
the idle task.
POLLING_NRFLAG width can be increased, to reduce the chance of resched IPIs.
Signed-off-by: Nick Piggin <npiggin@suse.de>
Cc: Ingo Molnar <mingo@elte.hu>
Cc: Con Kolivas <kernel@kolivas.org>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
2005-11-08 21:39:04 -08:00
if ( ! cx ) {
if ( pm_idle_save )
pm_idle_save ( ) ;
else
acpi_safe_halt ( ) ;
return ;
}
2005-04-16 15:20:36 -07:00
/*
* Check BM Activity
* - - - - - - - - - - - - - - - - -
* Check for bus mastering activity ( if required ) , record , and check
* for demotion .
*/
if ( pr - > flags . bm_check ) {
2005-08-05 00:44:28 -04:00
u32 bm_status = 0 ;
unsigned long diff = jiffies - pr - > power . bm_check_timestamp ;
2005-04-16 15:20:36 -07:00
2006-06-24 19:37:00 -04:00
if ( diff > 31 )
diff = 31 ;
2005-04-16 15:20:36 -07:00
2006-06-24 19:37:00 -04:00
pr - > power . bm_activity < < = diff ;
2005-04-16 15:20:36 -07:00
acpi_get_register ( ACPI_BITREG_BUS_MASTER_STATUS ,
2005-08-05 00:44:28 -04:00
& bm_status , ACPI_MTX_DO_NOT_LOCK ) ;
2005-04-16 15:20:36 -07:00
if ( bm_status ) {
2006-06-24 19:37:00 -04:00
pr - > power . bm_activity | = 0x1 ;
2005-04-16 15:20:36 -07:00
acpi_set_register ( ACPI_BITREG_BUS_MASTER_STATUS ,
2005-08-05 00:44:28 -04:00
1 , ACPI_MTX_DO_NOT_LOCK ) ;
2005-04-16 15:20:36 -07:00
}
/*
* PIIX4 Erratum # 18 : Note that BM_STS doesn ' t always reflect
* the true state of bus mastering activity ; forcing us to
* manually check the BMIDEA bit of each IDE channel .
*/
else if ( errata . piix4 . bmisx ) {
if ( ( inb_p ( errata . piix4 . bmisx + 0x02 ) & 0x01 )
2005-08-05 00:44:28 -04:00
| | ( inb_p ( errata . piix4 . bmisx + 0x0A ) & 0x01 ) )
2006-06-24 19:37:00 -04:00
pr - > power . bm_activity | = 0x1 ;
2005-04-16 15:20:36 -07:00
}
pr - > power . bm_check_timestamp = jiffies ;
/*
2006-06-24 19:37:00 -04:00
* If bus mastering is or was active this jiffy , demote
2005-04-16 15:20:36 -07:00
* to avoid a faulty transition . Note that the processor
* won ' t enter a low - power state during this call ( to this
2006-06-24 19:37:00 -04:00
* function ) but should upon the next .
2005-04-16 15:20:36 -07:00
*
* TBD : A better policy might be to fallback to the demotion
* state ( use it for this quantum only ) istead of
* demoting - - and rely on duration as our sole demotion
* qualification . This may , however , introduce DMA
* issues ( e . g . floppy DMA transfer overrun / underrun ) .
*/
2006-06-24 19:37:00 -04:00
if ( ( pr - > power . bm_activity & 0x1 ) & &
cx - > demotion . threshold . bm ) {
2005-04-16 15:20:36 -07:00
local_irq_enable ( ) ;
next_state = cx - > demotion . state ;
goto end ;
}
}
2005-09-15 12:20:00 -04:00
# ifdef CONFIG_HOTPLUG_CPU
/*
* Check for P_LVL2_UP flag before entering C2 and above on
* an SMP system . We do it here instead of doing it at _CST / P_LVL
* detection phase , to work cleanly with logical CPU hotplug .
*/
if ( ( cx - > type ! = ACPI_STATE_C1 ) & & ( num_online_cpus ( ) > 1 ) & &
2005-12-01 17:00:00 -05:00
! pr - > flags . has_cst & & ! acpi_fadt . plvl2_up )
cx = & pr - > power . states [ ACPI_STATE_C1 ] ;
2005-09-15 12:20:00 -04:00
# endif
2005-12-01 17:00:00 -05:00
2005-04-16 15:20:36 -07:00
/*
* Sleep :
* - - - - - -
* Invoke the current Cx state to put the processor to sleep .
*/
2005-12-02 12:44:19 +11:00
if ( cx - > type = = ACPI_STATE_C2 | | cx - > type = = ACPI_STATE_C3 ) {
2006-06-26 13:59:11 +02:00
current_thread_info ( ) - > status & = ~ TS_POLLING ;
[PATCH] sched: fix bad missed wakeups in the i386, x86_64, ia64, ACPI and APM idle code
Fernando Lopez-Lezcano reported frequent scheduling latencies and audio
xruns starting at the 2.6.18-rt kernel, and those problems persisted all
until current -rt kernels. The latencies were serious and unjustified by
system load, often in the milliseconds range.
After a patient and heroic multi-month effort of Fernando, where he
tested dozens of kernels, tried various configs, boot options,
test-patches of mine and provided latency traces of those incidents, the
following 'smoking gun' trace was captured by him:
_------=> CPU#
/ _-----=> irqs-off
| / _----=> need-resched
|| / _---=> hardirq/softirq
||| / _--=> preempt-depth
|||| /
||||| delay
cmd pid ||||| time | caller
\ / ||||| \ | /
IRQ_19-1479 1D..1 0us : __trace_start_sched_wakeup (try_to_wake_up)
IRQ_19-1479 1D..1 0us : __trace_start_sched_wakeup <<...>-5856> (37 0)
IRQ_19-1479 1D..1 0us : __trace_start_sched_wakeup (c01262ba 0 0)
IRQ_19-1479 1D..1 0us : resched_task (try_to_wake_up)
IRQ_19-1479 1D..1 0us : __spin_unlock_irqrestore (try_to_wake_up)
...
<idle>-0 1...1 11us!: default_idle (cpu_idle)
...
<idle>-0 0Dn.1 602us : smp_apic_timer_interrupt (c0103baf 1 0)
...
<...>-5856 0D..2 618us : __switch_to (__schedule)
<...>-5856 0D..2 618us : __schedule <<idle>-0> (20 162)
<...>-5856 0D..2 619us : __spin_unlock_irq (__schedule)
<...>-5856 0...1 619us : trace_stop_sched_switched (__schedule)
<...>-5856 0D..1 619us : trace_stop_sched_switched <<...>-5856> (37 0)
what is visible in this trace is that CPU#1 ran try_to_wake_up() for
PID:5856, it placed PID:5856 on CPU#0's runqueue and ran resched_task()
for CPU#0. But it decided to not send an IPI that no CPU - due to
TS_POLLING. But CPU#0 never woke up after its NEED_RESCHED bit was set,
and only rescheduled to PID:5856 upon the next lapic timer IRQ. The
result was a 600+ usecs latency and a missed wakeup!
the bug turned out to be an idle-wakeup bug introduced into the mainline
kernel this summer via an optimization in the x86_64 tree:
commit 495ab9c045e1b0e5c82951b762257fe1c9d81564
Author: Andi Kleen <ak@suse.de>
Date: Mon Jun 26 13:59:11 2006 +0200
[PATCH] i386/x86-64/ia64: Move polling flag into thread_info_status
During some profiling I noticed that default_idle causes a lot of
memory traffic. I think that is caused by the atomic operations
to clear/set the polling flag in thread_info. There is actually
no reason to make this atomic - only the idle thread does it
to itself, other CPUs only read it. So I moved it into ti->status.
the problem is this type of change:
if (!hlt_counter && boot_cpu_data.hlt_works_ok) {
- clear_thread_flag(TIF_POLLING_NRFLAG);
+ current_thread_info()->status &= ~TS_POLLING;
smp_mb__after_clear_bit();
while (!need_resched()) {
local_irq_disable();
this changes clear_thread_flag() to an explicit clearing of TS_POLLING.
clear_thread_flag() is defined as:
clear_bit(flag, &ti->flags);
and clear_bit() is a LOCK-ed atomic instruction on all x86 platforms:
static inline void clear_bit(int nr, volatile unsigned long * addr)
{
__asm__ __volatile__( LOCK_PREFIX
"btrl %1,%0"
hence smp_mb__after_clear_bit() is defined as a simple compile barrier:
#define smp_mb__after_clear_bit() barrier()
but the explicit TS_POLLING clearing introduced by the patch:
+ current_thread_info()->status &= ~TS_POLLING;
is not an atomic op! So the clearing of the TS_POLLING bit is freely
reorderable with the reading of the NEED_RESCHED bit - and both now
reside in different memory addresses.
CPU idle wakeup very much depends on ordered memory ops, the clearing of
the TS_POLLING flag must always be done before we test need_resched()
and hit the idle instruction(s). [Symmetrically, the wakeup code needs
to set NEED_RESCHED before it tests the TS_POLLING flag, so memory
ordering is paramount.]
Fernando's dual-core Athlon64 system has a sufficiently advanced memory
ordering model so that it triggered this scenario very often.
( And it also turned out that the reason why these latencies never
triggered on my testsystems is that i routinely use idle=poll, which
was the only idle variant not affected by this bug. )
The fix is to change the smp_mb__after_clear_bit() to an smp_mb(), to
act as an absolute barrier between the TS_POLLING write and the
NEED_RESCHED read. This affects almost all idling methods (default,
ACPI, APM), on all 3 x86 architectures: i386, x86_64, ia64.
Signed-off-by: Ingo Molnar <mingo@elte.hu>
Tested-by: Fernando Lopez-Lezcano <nando@ccrma.Stanford.EDU>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
2006-12-22 01:11:56 -08:00
/*
* TS_POLLING - cleared state must be visible before we
* test NEED_RESCHED :
*/
smp_mb ( ) ;
2005-12-02 12:44:19 +11:00
if ( need_resched ( ) ) {
2006-06-26 13:59:11 +02:00
current_thread_info ( ) - > status | = TS_POLLING ;
2005-12-02 23:09:06 -08:00
local_irq_enable ( ) ;
2005-12-02 12:44:19 +11:00
return ;
}
}
2005-04-16 15:20:36 -07:00
switch ( cx - > type ) {
case ACPI_STATE_C1 :
/*
* Invoke C1 .
* Use the appropriate idle routine , the one that would
* be used without acpi C - states .
*/
if ( pm_idle_save )
pm_idle_save ( ) ;
else
[PATCH] sched: resched and cpu_idle rework
Make some changes to the NEED_RESCHED and POLLING_NRFLAG to reduce
confusion, and make their semantics rigid. Improves efficiency of
resched_task and some cpu_idle routines.
* In resched_task:
- TIF_NEED_RESCHED is only cleared with the task's runqueue lock held,
and as we hold it during resched_task, then there is no need for an
atomic test and set there. The only other time this should be set is
when the task's quantum expires, in the timer interrupt - this is
protected against because the rq lock is irq-safe.
- If TIF_NEED_RESCHED is set, then we don't need to do anything. It
won't get unset until the task get's schedule()d off.
- If we are running on the same CPU as the task we resched, then set
TIF_NEED_RESCHED and no further action is required.
- If we are running on another CPU, and TIF_POLLING_NRFLAG is *not* set
after TIF_NEED_RESCHED has been set, then we need to send an IPI.
Using these rules, we are able to remove the test and set operation in
resched_task, and make clear the previously vague semantics of
POLLING_NRFLAG.
* In idle routines:
- Enter cpu_idle with preempt disabled. When the need_resched() condition
becomes true, explicitly call schedule(). This makes things a bit clearer
(IMO), but haven't updated all architectures yet.
- Many do a test and clear of TIF_NEED_RESCHED for some reason. According
to the resched_task rules, this isn't needed (and actually breaks the
assumption that TIF_NEED_RESCHED is only cleared with the runqueue lock
held). So remove that. Generally one less locked memory op when switching
to the idle thread.
- Many idle routines clear TIF_POLLING_NRFLAG, and only set it in the inner
most polling idle loops. The above resched_task semantics allow it to be
set until before the last time need_resched() is checked before going into
a halt requiring interrupt wakeup.
Many idle routines simply never enter such a halt, and so POLLING_NRFLAG
can be always left set, completely eliminating resched IPIs when rescheduling
the idle task.
POLLING_NRFLAG width can be increased, to reduce the chance of resched IPIs.
Signed-off-by: Nick Piggin <npiggin@suse.de>
Cc: Ingo Molnar <mingo@elte.hu>
Cc: Con Kolivas <kernel@kolivas.org>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
2005-11-08 21:39:04 -08:00
acpi_safe_halt ( ) ;
2005-04-16 15:20:36 -07:00
/*
2005-08-05 00:44:28 -04:00
* TBD : Can ' t get time duration while in C1 , as resumes
2005-04-16 15:20:36 -07:00
* go to an ISR rather than here . Need to instrument
* base interrupt handler .
*/
sleep_ticks = 0xFFFFFFFF ;
break ;
case ACPI_STATE_C2 :
/* Get start time (ticks) */
t1 = inl ( acpi_fadt . xpm_tmr_blk . address ) ;
/* Invoke C2 */
2006-09-25 16:28:13 -07:00
acpi_cstate_enter ( cx ) ;
2005-04-16 15:20:36 -07:00
/* Get end time (ticks) */
t2 = inl ( acpi_fadt . xpm_tmr_blk . address ) ;
2006-06-26 00:25:10 -07:00
# ifdef CONFIG_GENERIC_TIME
/* TSC halts in C2, so notify users */
mark_tsc_unstable ( ) ;
# endif
2005-04-16 15:20:36 -07:00
/* Re-enable interrupts */
local_irq_enable ( ) ;
2006-06-26 13:59:11 +02:00
current_thread_info ( ) - > status | = TS_POLLING ;
2005-04-16 15:20:36 -07:00
/* Compute time (ticks) that we were actually asleep */
2005-08-05 00:44:28 -04:00
sleep_ticks =
ticks_elapsed ( t1 , t2 ) - cx - > latency_ticks - C2_OVERHEAD ;
2005-04-16 15:20:36 -07:00
break ;
case ACPI_STATE_C3 :
2005-08-05 00:44:28 -04:00
2005-04-15 15:07:10 -04:00
if ( pr - > flags . bm_check ) {
if ( atomic_inc_return ( & c3_cpu_count ) = =
2005-08-05 00:44:28 -04:00
num_online_cpus ( ) ) {
2005-04-15 15:07:10 -04:00
/*
* All CPUs are trying to go to C3
* Disable bus master arbitration
*/
acpi_set_register ( ACPI_BITREG_ARB_DISABLE , 1 ,
2005-08-05 00:44:28 -04:00
ACPI_MTX_DO_NOT_LOCK ) ;
2005-04-15 15:07:10 -04:00
}
} else {
/* SMP with no shared cache... Invalidate cache */
ACPI_FLUSH_CPU_CACHE ( ) ;
}
2005-08-05 00:44:28 -04:00
2005-04-16 15:20:36 -07:00
/* Get start time (ticks) */
t1 = inl ( acpi_fadt . xpm_tmr_blk . address ) ;
/* Invoke C3 */
2006-09-25 16:28:13 -07:00
acpi_cstate_enter ( cx ) ;
2005-04-16 15:20:36 -07:00
/* Get end time (ticks) */
t2 = inl ( acpi_fadt . xpm_tmr_blk . address ) ;
2005-04-15 15:07:10 -04:00
if ( pr - > flags . bm_check ) {
/* Enable bus master arbitration */
atomic_dec ( & c3_cpu_count ) ;
2005-08-05 00:44:28 -04:00
acpi_set_register ( ACPI_BITREG_ARB_DISABLE , 0 ,
ACPI_MTX_DO_NOT_LOCK ) ;
2005-04-15 15:07:10 -04:00
}
2006-06-26 00:25:10 -07:00
# ifdef CONFIG_GENERIC_TIME
/* TSC halts in C3, so notify users */
mark_tsc_unstable ( ) ;
# endif
2005-04-16 15:20:36 -07:00
/* Re-enable interrupts */
local_irq_enable ( ) ;
2006-06-26 13:59:11 +02:00
current_thread_info ( ) - > status | = TS_POLLING ;
2005-04-16 15:20:36 -07:00
/* Compute time (ticks) that we were actually asleep */
2005-08-05 00:44:28 -04:00
sleep_ticks =
ticks_elapsed ( t1 , t2 ) - cx - > latency_ticks - C3_OVERHEAD ;
2005-04-16 15:20:36 -07:00
break ;
default :
local_irq_enable ( ) ;
return ;
}
2006-06-24 19:37:00 -04:00
cx - > usage + + ;
if ( ( cx - > type ! = ACPI_STATE_C1 ) & & ( sleep_ticks > 0 ) )
cx - > time + = sleep_ticks ;
2005-04-16 15:20:36 -07:00
next_state = pr - > power . state ;
2005-12-01 17:00:00 -05:00
# ifdef CONFIG_HOTPLUG_CPU
/* Don't do promotion/demotion */
if ( ( cx - > type = = ACPI_STATE_C1 ) & & ( num_online_cpus ( ) > 1 ) & &
! pr - > flags . has_cst & & ! acpi_fadt . plvl2_up ) {
next_state = cx ;
goto end ;
}
# endif
2005-04-16 15:20:36 -07:00
/*
* Promotion ?
* - - - - - - - - - -
* Track the number of longs ( time asleep is greater than threshold )
* and promote when the count threshold is reached . Note that bus
* mastering activity may prevent promotions .
* Do not promote above max_cstate .
*/
if ( cx - > promotion . state & &
( ( cx - > promotion . state - pr - > power . states ) < = max_cstate ) ) {
2006-09-30 23:27:17 -07:00
if ( sleep_ticks > cx - > promotion . threshold . ticks & &
cx - > promotion . state - > latency < = system_latency_constraint ( ) ) {
2005-04-16 15:20:36 -07:00
cx - > promotion . count + + ;
2005-08-05 00:44:28 -04:00
cx - > demotion . count = 0 ;
if ( cx - > promotion . count > =
cx - > promotion . threshold . count ) {
2005-04-16 15:20:36 -07:00
if ( pr - > flags . bm_check ) {
2005-08-05 00:44:28 -04:00
if ( !
( pr - > power . bm_activity & cx - >
promotion . threshold . bm ) ) {
next_state =
cx - > promotion . state ;
2005-04-16 15:20:36 -07:00
goto end ;
}
2005-08-05 00:44:28 -04:00
} else {
2005-04-16 15:20:36 -07:00
next_state = cx - > promotion . state ;
goto end ;
}
}
}
}
/*
* Demotion ?
* - - - - - - - - -
* Track the number of shorts ( time asleep is less than time threshold )
* and demote when the usage threshold is reached .
*/
if ( cx - > demotion . state ) {
if ( sleep_ticks < cx - > demotion . threshold . ticks ) {
cx - > demotion . count + + ;
cx - > promotion . count = 0 ;
if ( cx - > demotion . count > = cx - > demotion . threshold . count ) {
next_state = cx - > demotion . state ;
goto end ;
}
}
}
2005-08-05 00:44:28 -04:00
end :
2005-04-16 15:20:36 -07:00
/*
* Demote if current state exceeds max_cstate
2006-09-30 23:27:17 -07:00
* or if the latency of the current state is unacceptable
2005-04-16 15:20:36 -07:00
*/
2006-09-30 23:27:17 -07:00
if ( ( pr - > power . state - pr - > power . states ) > max_cstate | |
pr - > power . state - > latency > system_latency_constraint ( ) ) {
2005-04-16 15:20:36 -07:00
if ( cx - > demotion . state )
next_state = cx - > demotion . state ;
}
/*
* New Cx State ?
* - - - - - - - - - - - - -
* If we ' re going to start using a new Cx state we must clean up
* from the previous and prepare to use the new .
*/
if ( next_state ! = pr - > power . state )
acpi_processor_power_activate ( pr , next_state ) ;
}
2005-08-05 00:44:28 -04:00
static int acpi_processor_set_power_policy ( struct acpi_processor * pr )
2005-04-16 15:20:36 -07:00
{
unsigned int i ;
unsigned int state_is_set = 0 ;
struct acpi_processor_cx * lower = NULL ;
struct acpi_processor_cx * higher = NULL ;
struct acpi_processor_cx * cx ;
if ( ! pr )
2006-06-27 00:41:40 -04:00
return - EINVAL ;
2005-04-16 15:20:36 -07:00
/*
* This function sets the default Cx state policy ( OS idle handler ) .
* Our scheme is to promote quickly to C2 but more conservatively
* to C3 . We ' re favoring C2 for its characteristics of low latency
* ( quick response ) , good power savings , and ability to allow bus
* mastering activity . Note that the Cx state policy is completely
* customizable and can be altered dynamically .
*/
/* startup state */
2005-08-05 00:44:28 -04:00
for ( i = 1 ; i < ACPI_PROCESSOR_MAX_POWER ; i + + ) {
2005-04-16 15:20:36 -07:00
cx = & pr - > power . states [ i ] ;
if ( ! cx - > valid )
continue ;
if ( ! state_is_set )
pr - > power . state = cx ;
state_is_set + + ;
break ;
2005-08-05 00:44:28 -04:00
}
2005-04-16 15:20:36 -07:00
if ( ! state_is_set )
2006-06-27 00:41:40 -04:00
return - ENODEV ;
2005-04-16 15:20:36 -07:00
/* demotion */
2005-08-05 00:44:28 -04:00
for ( i = 1 ; i < ACPI_PROCESSOR_MAX_POWER ; i + + ) {
2005-04-16 15:20:36 -07:00
cx = & pr - > power . states [ i ] ;
if ( ! cx - > valid )
continue ;
if ( lower ) {
cx - > demotion . state = lower ;
cx - > demotion . threshold . ticks = cx - > latency_ticks ;
cx - > demotion . threshold . count = 1 ;
if ( cx - > type = = ACPI_STATE_C3 )
cx - > demotion . threshold . bm = bm_history ;
}
lower = cx ;
}
/* promotion */
for ( i = ( ACPI_PROCESSOR_MAX_POWER - 1 ) ; i > 0 ; i - - ) {
cx = & pr - > power . states [ i ] ;
if ( ! cx - > valid )
continue ;
if ( higher ) {
2005-08-05 00:44:28 -04:00
cx - > promotion . state = higher ;
2005-04-16 15:20:36 -07:00
cx - > promotion . threshold . ticks = cx - > latency_ticks ;
if ( cx - > type > = ACPI_STATE_C2 )
cx - > promotion . threshold . count = 4 ;
else
cx - > promotion . threshold . count = 10 ;
if ( higher - > type = = ACPI_STATE_C3 )
cx - > promotion . threshold . bm = bm_history ;
}
higher = cx ;
}
2006-06-27 00:41:40 -04:00
return 0 ;
2005-04-16 15:20:36 -07:00
}
2005-08-05 00:44:28 -04:00
static int acpi_processor_get_power_info_fadt ( struct acpi_processor * pr )
2005-04-16 15:20:36 -07:00
{
if ( ! pr )
2006-06-27 00:41:40 -04:00
return - EINVAL ;
2005-04-16 15:20:36 -07:00
if ( ! pr - > pblk )
2006-06-27 00:41:40 -04:00
return - ENODEV ;
2005-04-16 15:20:36 -07:00
/* if info is obtained from pblk/fadt, type equals state */
pr - > power . states [ ACPI_STATE_C2 ] . type = ACPI_STATE_C2 ;
pr - > power . states [ ACPI_STATE_C3 ] . type = ACPI_STATE_C3 ;
2005-09-15 12:20:00 -04:00
# ifndef CONFIG_HOTPLUG_CPU
/*
* Check for P_LVL2_UP flag before entering C2 and above on
* an SMP system .
*/
2005-12-01 17:00:00 -05:00
if ( ( num_online_cpus ( ) > 1 ) & & ! acpi_fadt . plvl2_up )
2006-06-27 00:41:40 -04:00
return - ENODEV ;
2005-09-15 12:20:00 -04:00
# endif
2005-04-16 15:20:36 -07:00
/* determine C2 and C3 address from pblk */
pr - > power . states [ ACPI_STATE_C2 ] . address = pr - > pblk + 4 ;
pr - > power . states [ ACPI_STATE_C3 ] . address = pr - > pblk + 5 ;
/* determine latencies from FADT */
pr - > power . states [ ACPI_STATE_C2 ] . latency = acpi_fadt . plvl2_lat ;
pr - > power . states [ ACPI_STATE_C3 ] . latency = acpi_fadt . plvl3_lat ;
ACPI_DEBUG_PRINT ( ( ACPI_DB_INFO ,
" lvl2[0x%08x] lvl3[0x%08x] \n " ,
pr - > power . states [ ACPI_STATE_C2 ] . address ,
pr - > power . states [ ACPI_STATE_C3 ] . address ) ) ;
2006-06-27 00:41:40 -04:00
return 0 ;
2005-04-16 15:20:36 -07:00
}
2006-09-25 16:28:13 -07:00
static int acpi_processor_get_power_info_default ( struct acpi_processor * pr )
2005-03-31 23:23:15 -05:00
{
2006-09-25 16:28:13 -07:00
if ( ! pr - > power . states [ ACPI_STATE_C1 ] . valid ) {
/* set the first C-State to C1 */
/* all processors need to support C1 */
pr - > power . states [ ACPI_STATE_C1 ] . type = ACPI_STATE_C1 ;
pr - > power . states [ ACPI_STATE_C1 ] . valid = 1 ;
}
/* the C0 state only exists as a filler in our array */
2005-03-31 23:23:15 -05:00
pr - > power . states [ ACPI_STATE_C0 ] . valid = 1 ;
2006-06-27 00:41:40 -04:00
return 0 ;
2005-03-31 23:23:15 -05:00
}
2005-08-05 00:44:28 -04:00
static int acpi_processor_get_power_info_cst ( struct acpi_processor * pr )
2005-04-16 15:20:36 -07:00
{
2005-08-05 00:44:28 -04:00
acpi_status status = 0 ;
acpi_integer count ;
2005-08-20 08:02:00 -04:00
int current_count ;
2005-08-05 00:44:28 -04:00
int i ;
struct acpi_buffer buffer = { ACPI_ALLOCATE_BUFFER , NULL } ;
union acpi_object * cst ;
2005-04-16 15:20:36 -07:00
if ( nocst )
2006-06-27 00:41:40 -04:00
return - ENODEV ;
2005-04-16 15:20:36 -07:00
2006-09-25 16:28:13 -07:00
current_count = 0 ;
2005-04-16 15:20:36 -07:00
status = acpi_evaluate_object ( pr - > handle , " _CST " , NULL , & buffer ) ;
if ( ACPI_FAILURE ( status ) ) {
ACPI_DEBUG_PRINT ( ( ACPI_DB_INFO , " No _CST, giving up \n " ) ) ;
2006-06-27 00:41:40 -04:00
return - ENODEV ;
2005-08-05 00:44:28 -04:00
}
2005-04-16 15:20:36 -07:00
2006-10-01 00:28:50 +02:00
cst = buffer . pointer ;
2005-04-16 15:20:36 -07:00
/* There must be at least 2 elements */
if ( ! cst | | ( cst - > type ! = ACPI_TYPE_PACKAGE ) | | cst - > package . count < 2 ) {
2006-06-26 23:41:38 -04:00
printk ( KERN_ERR PREFIX " not enough elements in _CST \n " ) ;
2005-04-16 15:20:36 -07:00
status = - EFAULT ;
goto end ;
}
count = cst - > package . elements [ 0 ] . integer . value ;
/* Validate number of power states. */
if ( count < 1 | | count ! = cst - > package . count - 1 ) {
2006-06-26 23:41:38 -04:00
printk ( KERN_ERR PREFIX " count given by _CST is not valid \n " ) ;
2005-04-16 15:20:36 -07:00
status = - EFAULT ;
goto end ;
}
/* Tell driver that at least _CST is supported. */
pr - > flags . has_cst = 1 ;
for ( i = 1 ; i < = count ; i + + ) {
union acpi_object * element ;
union acpi_object * obj ;
struct acpi_power_register * reg ;
struct acpi_processor_cx cx ;
memset ( & cx , 0 , sizeof ( cx ) ) ;
2006-10-01 00:28:50 +02:00
element = & ( cst - > package . elements [ i ] ) ;
2005-04-16 15:20:36 -07:00
if ( element - > type ! = ACPI_TYPE_PACKAGE )
continue ;
if ( element - > package . count ! = 4 )
continue ;
2006-10-01 00:28:50 +02:00
obj = & ( element - > package . elements [ 0 ] ) ;
2005-04-16 15:20:36 -07:00
if ( obj - > type ! = ACPI_TYPE_BUFFER )
continue ;
2005-08-05 00:44:28 -04:00
reg = ( struct acpi_power_register * ) obj - > buffer . pointer ;
2005-04-16 15:20:36 -07:00
if ( reg - > space_id ! = ACPI_ADR_SPACE_SYSTEM_IO & &
2005-08-05 00:44:28 -04:00
( reg - > space_id ! = ACPI_ADR_SPACE_FIXED_HARDWARE ) )
2005-04-16 15:20:36 -07:00
continue ;
/* There should be an easy way to extract an integer... */
2006-10-01 00:28:50 +02:00
obj = & ( element - > package . elements [ 1 ] ) ;
2005-04-16 15:20:36 -07:00
if ( obj - > type ! = ACPI_TYPE_INTEGER )
continue ;
cx . type = obj - > integer . value ;
2006-09-25 16:28:13 -07:00
/*
* Some buggy BIOSes won ' t list C1 in _CST -
* Let acpi_processor_get_power_info_default ( ) handle them later
*/
if ( i = = 1 & & cx . type ! = ACPI_STATE_C1 )
current_count + + ;
cx . address = reg - > address ;
cx . index = current_count + 1 ;
cx . space_id = ACPI_CSTATE_SYSTEMIO ;
if ( reg - > space_id = = ACPI_ADR_SPACE_FIXED_HARDWARE ) {
if ( acpi_processor_ffh_cstate_probe
( pr - > id , & cx , reg ) = = 0 ) {
cx . space_id = ACPI_CSTATE_FFH ;
} else if ( cx . type ! = ACPI_STATE_C1 ) {
/*
* C1 is a special case where FIXED_HARDWARE
* can be handled in non - MWAIT way as well .
* In that case , save this _CST entry info .
* That is , we retain space_id of SYSTEM_IO for
* halt based C1 .
* Otherwise , ignore this info and continue .
*/
continue ;
}
}
2005-04-16 15:20:36 -07:00
2006-10-01 00:28:50 +02:00
obj = & ( element - > package . elements [ 2 ] ) ;
2005-04-16 15:20:36 -07:00
if ( obj - > type ! = ACPI_TYPE_INTEGER )
continue ;
cx . latency = obj - > integer . value ;
2006-10-01 00:28:50 +02:00
obj = & ( element - > package . elements [ 3 ] ) ;
2005-04-16 15:20:36 -07:00
if ( obj - > type ! = ACPI_TYPE_INTEGER )
continue ;
cx . power = obj - > integer . value ;
2005-08-20 08:02:00 -04:00
current_count + + ;
memcpy ( & ( pr - > power . states [ current_count ] ) , & cx , sizeof ( cx ) ) ;
/*
* We support total ACPI_PROCESSOR_MAX_POWER - 1
* ( From 1 through ACPI_PROCESSOR_MAX_POWER - 1 )
*/
if ( current_count > = ( ACPI_PROCESSOR_MAX_POWER - 1 ) ) {
printk ( KERN_WARNING
" Limiting number of power states to max (%d) \n " ,
ACPI_PROCESSOR_MAX_POWER ) ;
printk ( KERN_WARNING
" Please increase ACPI_PROCESSOR_MAX_POWER if needed. \n " ) ;
break ;
}
2005-04-16 15:20:36 -07:00
}
2005-08-05 00:44:28 -04:00
ACPI_DEBUG_PRINT ( ( ACPI_DB_INFO , " Found %d power states \n " ,
2005-08-20 08:02:00 -04:00
current_count ) ) ;
2005-04-16 15:20:36 -07:00
/* Validate number of power states discovered */
2005-08-20 08:02:00 -04:00
if ( current_count < 2 )
2005-09-15 12:19:00 -04:00
status = - EFAULT ;
2005-04-16 15:20:36 -07:00
2005-08-05 00:44:28 -04:00
end :
2006-06-30 03:19:10 -04:00
kfree ( buffer . pointer ) ;
2005-04-16 15:20:36 -07:00
2006-06-27 00:41:40 -04:00
return status ;
2005-04-16 15:20:36 -07:00
}
static void acpi_processor_power_verify_c2 ( struct acpi_processor_cx * cx )
{
if ( ! cx - > address )
2006-06-27 00:41:40 -04:00
return ;
2005-04-16 15:20:36 -07:00
/*
* C2 latency must be less than or equal to 100
* microseconds .
*/
else if ( cx - > latency > ACPI_PROCESSOR_MAX_C2_LATENCY ) {
ACPI_DEBUG_PRINT ( ( ACPI_DB_INFO ,
2005-08-05 00:44:28 -04:00
" latency too large [%d] \n " , cx - > latency ) ) ;
2006-06-27 00:41:40 -04:00
return ;
2005-04-16 15:20:36 -07:00
}
/*
* Otherwise we ' ve met all of our C2 requirements .
* Normalize the C2 latency to expidite policy
*/
cx - > valid = 1 ;
cx - > latency_ticks = US_TO_PM_TIMER_TICKS ( cx - > latency ) ;
2006-06-27 00:41:40 -04:00
return ;
2005-04-16 15:20:36 -07:00
}
2005-08-05 00:44:28 -04:00
static void acpi_processor_power_verify_c3 ( struct acpi_processor * pr ,
struct acpi_processor_cx * cx )
2005-04-16 15:20:36 -07:00
{
2005-04-15 15:07:10 -04:00
static int bm_check_flag ;
2005-04-16 15:20:36 -07:00
if ( ! cx - > address )
2006-06-27 00:41:40 -04:00
return ;
2005-04-16 15:20:36 -07:00
/*
* C3 latency must be less than or equal to 1000
* microseconds .
*/
else if ( cx - > latency > ACPI_PROCESSOR_MAX_C3_LATENCY ) {
ACPI_DEBUG_PRINT ( ( ACPI_DB_INFO ,
2005-08-05 00:44:28 -04:00
" latency too large [%d] \n " , cx - > latency ) ) ;
2006-06-27 00:41:40 -04:00
return ;
2005-04-16 15:20:36 -07:00
}
/*
* PIIX4 Erratum # 18 : We don ' t support C3 when Type - F ( fast )
* DMA transfers are used by any ISA device to avoid livelock .
* Note that we could disable Type - F DMA ( as recommended by
* the erratum ) , but this is known to disrupt certain ISA
* devices thus we take the conservative approach .
*/
else if ( errata . piix4 . fdma ) {
ACPI_DEBUG_PRINT ( ( ACPI_DB_INFO ,
2005-08-05 00:44:28 -04:00
" C3 not supported on PIIX4 with Type-F DMA \n " ) ) ;
2006-06-27 00:41:40 -04:00
return ;
2005-04-16 15:20:36 -07:00
}
2005-04-15 15:07:10 -04:00
/* All the logic here assumes flags.bm_check is same across all CPUs */
if ( ! bm_check_flag ) {
/* Determine whether bm_check is needed based on CPU */
acpi_processor_power_init_bm_check ( & ( pr - > flags ) , pr - > id ) ;
bm_check_flag = pr - > flags . bm_check ;
} else {
pr - > flags . bm_check = bm_check_flag ;
}
if ( pr - > flags . bm_check ) {
/* bus mastering control is necessary */
if ( ! pr - > flags . bm_control ) {
ACPI_DEBUG_PRINT ( ( ACPI_DB_INFO ,
2005-08-05 00:44:28 -04:00
" C3 support requires bus mastering control \n " ) ) ;
2006-06-27 00:41:40 -04:00
return ;
2005-04-15 15:07:10 -04:00
}
} else {
/*
* WBINVD should be set in fadt , for C3 state to be
* supported on when bm_check is not required .
*/
if ( acpi_fadt . wb_invd ! = 1 ) {
ACPI_DEBUG_PRINT ( ( ACPI_DB_INFO ,
2005-08-05 00:44:28 -04:00
" Cache invalidation should work properly "
" for C3 to be enabled on SMP systems \n " ) ) ;
2006-06-27 00:41:40 -04:00
return ;
2005-04-15 15:07:10 -04:00
}
acpi_set_register ( ACPI_BITREG_BUS_MASTER_RLD ,
2005-08-05 00:44:28 -04:00
0 , ACPI_MTX_DO_NOT_LOCK ) ;
2005-04-15 15:07:10 -04:00
}
2005-04-16 15:20:36 -07:00
/*
* Otherwise we ' ve met all of our C3 requirements .
* Normalize the C3 latency to expidite policy . Enable
* checking of bus mastering status ( bm_check ) so we can
* use this in our C3 policy
*/
cx - > valid = 1 ;
cx - > latency_ticks = US_TO_PM_TIMER_TICKS ( cx - > latency ) ;
2006-06-27 00:41:40 -04:00
return ;
2005-04-16 15:20:36 -07:00
}
static int acpi_processor_power_verify ( struct acpi_processor * pr )
{
unsigned int i ;
unsigned int working = 0 ;
2006-01-11 22:44:21 +01:00
2006-03-25 16:31:07 +01:00
# ifdef ARCH_APICTIMER_STOPS_ON_C3
2006-03-26 02:24:07 +01:00
int timer_broadcast = 0 ;
cpumask_t mask = cpumask_of_cpu ( pr - > id ) ;
2006-03-25 16:31:07 +01:00
on_each_cpu ( switch_ipi_to_APIC_timer , & mask , 1 , 1 ) ;
2006-01-11 22:44:21 +01:00
# endif
2005-08-05 00:44:28 -04:00
for ( i = 1 ; i < ACPI_PROCESSOR_MAX_POWER ; i + + ) {
2005-04-16 15:20:36 -07:00
struct acpi_processor_cx * cx = & pr - > power . states [ i ] ;
switch ( cx - > type ) {
case ACPI_STATE_C1 :
cx - > valid = 1 ;
break ;
case ACPI_STATE_C2 :
acpi_processor_power_verify_c2 ( cx ) ;
2006-03-25 16:31:07 +01:00
# ifdef ARCH_APICTIMER_STOPS_ON_C3
/* Some AMD systems fake C3 as C2, but still
have timer troubles */
if ( cx - > valid & &
boot_cpu_data . x86_vendor = = X86_VENDOR_AMD )
timer_broadcast + + ;
# endif
2005-04-16 15:20:36 -07:00
break ;
case ACPI_STATE_C3 :
acpi_processor_power_verify_c3 ( pr , cx ) ;
2006-01-11 22:44:21 +01:00
# ifdef ARCH_APICTIMER_STOPS_ON_C3
2006-03-25 16:31:07 +01:00
if ( cx - > valid )
timer_broadcast + + ;
2006-01-11 22:44:21 +01:00
# endif
2005-04-16 15:20:36 -07:00
break ;
}
if ( cx - > valid )
working + + ;
}
2006-03-25 16:31:07 +01:00
2006-03-26 02:24:07 +01:00
# ifdef ARCH_APICTIMER_STOPS_ON_C3
2006-03-25 16:31:07 +01:00
if ( timer_broadcast )
on_each_cpu ( switch_APIC_timer_to_ipi , & mask , 1 , 1 ) ;
2006-03-26 02:24:07 +01:00
# endif
2005-04-16 15:20:36 -07:00
return ( working ) ;
}
2005-08-05 00:44:28 -04:00
static int acpi_processor_get_power_info ( struct acpi_processor * pr )
2005-04-16 15:20:36 -07:00
{
unsigned int i ;
int result ;
/* NOTE: the idle thread may not be running while calling
* this function */
2006-09-25 16:28:13 -07:00
/* Zero initialize all the C-states info. */
memset ( pr - > power . states , 0 , sizeof ( pr - > power . states ) ) ;
2005-04-16 15:20:36 -07:00
result = acpi_processor_get_power_info_cst ( pr ) ;
2005-09-15 12:19:00 -04:00
if ( result = = - ENODEV )
2006-10-19 23:28:28 -07:00
result = acpi_processor_get_power_info_fadt ( pr ) ;
2005-09-15 12:19:00 -04:00
2006-09-25 16:28:13 -07:00
if ( result )
return result ;
acpi_processor_get_power_info_default ( pr ) ;
2005-08-20 08:02:00 -04:00
pr - > power . count = acpi_processor_power_verify ( pr ) ;
2005-04-16 15:20:36 -07:00
/*
* Set Default Policy
* - - - - - - - - - - - - - - - - - -
* Now that we know which states are supported , set the default
* policy . Note that this policy can be changed dynamically
* ( e . g . encourage deeper sleeps to conserve battery life when
* not on AC ) .
*/
result = acpi_processor_set_power_policy ( pr ) ;
if ( result )
2006-06-27 00:41:40 -04:00
return result ;
2005-04-16 15:20:36 -07:00
/*
* if one state of type C2 or C3 is available , mark this
* CPU as being " idle manageable "
*/
for ( i = 1 ; i < ACPI_PROCESSOR_MAX_POWER ; i + + ) {
2005-03-31 23:23:15 -05:00
if ( pr - > power . states [ i ] . valid ) {
2005-04-16 15:20:36 -07:00
pr - > power . count = i ;
2005-11-18 07:29:51 -08:00
if ( pr - > power . states [ i ] . type > = ACPI_STATE_C2 )
pr - > flags . power = 1 ;
2005-03-31 23:23:15 -05:00
}
2005-04-16 15:20:36 -07:00
}
2006-06-27 00:41:40 -04:00
return 0 ;
2005-04-16 15:20:36 -07:00
}
2005-08-05 00:44:28 -04:00
int acpi_processor_cst_has_changed ( struct acpi_processor * pr )
2005-04-16 15:20:36 -07:00
{
2005-08-05 00:44:28 -04:00
int result = 0 ;
2005-04-16 15:20:36 -07:00
if ( ! pr )
2006-06-27 00:41:40 -04:00
return - EINVAL ;
2005-04-16 15:20:36 -07:00
2005-08-05 00:44:28 -04:00
if ( nocst ) {
2006-06-27 00:41:40 -04:00
return - ENODEV ;
2005-04-16 15:20:36 -07:00
}
if ( ! pr - > flags . power_setup_done )
2006-06-27 00:41:40 -04:00
return - ENODEV ;
2005-04-16 15:20:36 -07:00
/* Fall back to the default idle loop */
pm_idle = pm_idle_save ;
2005-08-05 00:44:28 -04:00
synchronize_sched ( ) ; /* Relies on interrupts forcing exit from idle. */
2005-04-16 15:20:36 -07:00
pr - > flags . power = 0 ;
result = acpi_processor_get_power_info ( pr ) ;
if ( ( pr - > flags . power = = 1 ) & & ( pr - > flags . power_setup_done ) )
pm_idle = acpi_processor_idle ;
2006-06-27 00:41:40 -04:00
return result ;
2005-04-16 15:20:36 -07:00
}
/* proc interface */
static int acpi_processor_power_seq_show ( struct seq_file * seq , void * offset )
{
2006-10-01 00:28:50 +02:00
struct acpi_processor * pr = seq - > private ;
2005-08-05 00:44:28 -04:00
unsigned int i ;
2005-04-16 15:20:36 -07:00
if ( ! pr )
goto end ;
seq_printf ( seq , " active state: C%zd \n "
2005-08-05 00:44:28 -04:00
" max_cstate: C%d \n "
2006-09-30 23:27:17 -07:00
" bus master activity: %08x \n "
" maximum allowed latency: %d usec \n " ,
2005-08-05 00:44:28 -04:00
pr - > power . state ? pr - > power . state - pr - > power . states : 0 ,
2006-09-30 23:27:17 -07:00
max_cstate , ( unsigned ) pr - > power . bm_activity ,
system_latency_constraint ( ) ) ;
2005-04-16 15:20:36 -07:00
seq_puts ( seq , " states: \n " ) ;
for ( i = 1 ; i < = pr - > power . count ; i + + ) {
seq_printf ( seq , " %cC%d: " ,
2005-08-05 00:44:28 -04:00
( & pr - > power . states [ i ] = =
pr - > power . state ? ' * ' : ' ' ) , i ) ;
2005-04-16 15:20:36 -07:00
if ( ! pr - > power . states [ i ] . valid ) {
seq_puts ( seq , " <not supported> \n " ) ;
continue ;
}
switch ( pr - > power . states [ i ] . type ) {
case ACPI_STATE_C1 :
seq_printf ( seq , " type[C1] " ) ;
break ;
case ACPI_STATE_C2 :
seq_printf ( seq , " type[C2] " ) ;
break ;
case ACPI_STATE_C3 :
seq_printf ( seq , " type[C3] " ) ;
break ;
default :
seq_printf ( seq , " type[--] " ) ;
break ;
}
if ( pr - > power . states [ i ] . promotion . state )
seq_printf ( seq , " promotion[C%zd] " ,
2005-08-05 00:44:28 -04:00
( pr - > power . states [ i ] . promotion . state -
pr - > power . states ) ) ;
2005-04-16 15:20:36 -07:00
else
seq_puts ( seq , " promotion[--] " ) ;
if ( pr - > power . states [ i ] . demotion . state )
seq_printf ( seq , " demotion[C%zd] " ,
2005-08-05 00:44:28 -04:00
( pr - > power . states [ i ] . demotion . state -
pr - > power . states ) ) ;
2005-04-16 15:20:36 -07:00
else
seq_puts ( seq , " demotion[--] " ) ;
2006-06-24 19:37:00 -04:00
seq_printf ( seq , " latency[%03d] usage[%08d] duration[%020llu] \n " ,
2005-08-05 00:44:28 -04:00
pr - > power . states [ i ] . latency ,
2006-06-24 19:37:00 -04:00
pr - > power . states [ i ] . usage ,
pr - > power . states [ i ] . time ) ;
2005-04-16 15:20:36 -07:00
}
2005-08-05 00:44:28 -04:00
end :
2006-06-27 00:41:40 -04:00
return 0 ;
2005-04-16 15:20:36 -07:00
}
static int acpi_processor_power_open_fs ( struct inode * inode , struct file * file )
{
return single_open ( file , acpi_processor_power_seq_show ,
2005-08-05 00:44:28 -04:00
PDE ( inode ) - > data ) ;
2005-04-16 15:20:36 -07:00
}
2006-07-04 13:06:00 -04:00
static const struct file_operations acpi_processor_power_fops = {
2005-08-05 00:44:28 -04:00
. open = acpi_processor_power_open_fs ,
. read = seq_read ,
. llseek = seq_lseek ,
. release = single_release ,
2005-04-16 15:20:36 -07:00
} ;
2006-10-17 00:09:58 -07:00
# ifdef CONFIG_SMP
2006-09-30 23:27:17 -07:00
static void smp_callback ( void * v )
{
/* we already woke the CPU up, nothing more to do */
}
/*
* This function gets called when a part of the kernel has a new latency
* requirement . This means we need to get all processors out of their C - state ,
* and then recalculate a new suitable C - state . Just do a cross - cpu IPI ; that
* wakes them all right up .
*/
static int acpi_processor_latency_notify ( struct notifier_block * b ,
unsigned long l , void * v )
{
smp_call_function ( smp_callback , NULL , 0 , 1 ) ;
return NOTIFY_OK ;
}
static struct notifier_block acpi_processor_latency_notifier = {
. notifier_call = acpi_processor_latency_notify ,
} ;
2006-10-17 00:09:58 -07:00
# endif
2006-09-30 23:27:17 -07:00
2006-10-10 14:20:31 -07:00
int __cpuinit acpi_processor_power_init ( struct acpi_processor * pr ,
2005-08-05 00:44:28 -04:00
struct acpi_device * device )
2005-04-16 15:20:36 -07:00
{
2005-08-05 00:44:28 -04:00
acpi_status status = 0 ;
2006-04-27 05:25:00 -04:00
static int first_run ;
2005-08-05 00:44:28 -04:00
struct proc_dir_entry * entry = NULL ;
2005-04-16 15:20:36 -07:00
unsigned int i ;
if ( ! first_run ) {
dmi_check_system ( processor_power_dmi_table ) ;
if ( max_cstate < ACPI_C_STATES_MAX )
2005-08-05 00:44:28 -04:00
printk ( KERN_NOTICE
" ACPI: processor limited to max C-state %d \n " ,
max_cstate ) ;
2005-04-16 15:20:36 -07:00
first_run + + ;
2006-10-17 00:09:58 -07:00
# ifdef CONFIG_SMP
2006-09-30 23:27:17 -07:00
register_latency_notifier ( & acpi_processor_latency_notifier ) ;
2006-10-17 00:09:58 -07:00
# endif
2005-04-16 15:20:36 -07:00
}
2005-04-15 15:07:10 -04:00
if ( ! pr )
2006-06-27 00:41:40 -04:00
return - EINVAL ;
2005-04-15 15:07:10 -04:00
if ( acpi_fadt . cst_cnt & & ! nocst ) {
2005-08-05 00:44:28 -04:00
status =
acpi_os_write_port ( acpi_fadt . smi_cmd , acpi_fadt . cst_cnt , 8 ) ;
2005-04-16 15:20:36 -07:00
if ( ACPI_FAILURE ( status ) ) {
2006-06-26 23:58:43 -04:00
ACPI_EXCEPTION ( ( AE_INFO , status ,
" Notifying BIOS of _CST ability failed " ) ) ;
2005-04-16 15:20:36 -07:00
}
}
acpi_processor_get_power_info ( pr ) ;
/*
* Install the idle handler if processor power management is supported .
* Note that we use previously set idle handler will be used on
* platforms that only support C1 .
*/
if ( ( pr - > flags . power ) & & ( ! boot_option_idle_override ) ) {
printk ( KERN_INFO PREFIX " CPU%d (power states: " , pr - > id ) ;
for ( i = 1 ; i < = pr - > power . count ; i + + )
if ( pr - > power . states [ i ] . valid )
2005-08-05 00:44:28 -04:00
printk ( " C%d[C%d] " , i ,
pr - > power . states [ i ] . type ) ;
2005-04-16 15:20:36 -07:00
printk ( " ) \n " ) ;
if ( pr - > id = = 0 ) {
pm_idle_save = pm_idle ;
pm_idle = acpi_processor_idle ;
}
}
/* 'power' [R] */
entry = create_proc_entry ( ACPI_PROCESSOR_FILE_POWER ,
2005-08-05 00:44:28 -04:00
S_IRUGO , acpi_device_dir ( device ) ) ;
2005-04-16 15:20:36 -07:00
if ( ! entry )
2006-06-26 23:58:43 -04:00
return - EIO ;
2005-04-16 15:20:36 -07:00
else {
entry - > proc_fops = & acpi_processor_power_fops ;
entry - > data = acpi_driver_data ( device ) ;
entry - > owner = THIS_MODULE ;
}
pr - > flags . power_setup_done = 1 ;
2006-06-27 00:41:40 -04:00
return 0 ;
2005-04-16 15:20:36 -07:00
}
2005-08-05 00:44:28 -04:00
int acpi_processor_power_exit ( struct acpi_processor * pr ,
struct acpi_device * device )
2005-04-16 15:20:36 -07:00
{
pr - > flags . power_setup_done = 0 ;
if ( acpi_device_dir ( device ) )
2005-08-05 00:44:28 -04:00
remove_proc_entry ( ACPI_PROCESSOR_FILE_POWER ,
acpi_device_dir ( device ) ) ;
2005-04-16 15:20:36 -07:00
/* Unregister the idle handler when processor #0 is removed. */
if ( pr - > id = = 0 ) {
pm_idle = pm_idle_save ;
/*
* We are about to unload the current idle thread pm callback
* ( pm_idle ) , Wait for all processors to update cached / local
* copies of pm_idle before proceeding .
*/
cpu_idle_wait ( ) ;
2006-10-17 00:09:58 -07:00
# ifdef CONFIG_SMP
2006-09-30 23:27:17 -07:00
unregister_latency_notifier ( & acpi_processor_latency_notifier ) ;
2006-10-17 00:09:58 -07:00
# endif
2005-04-16 15:20:36 -07:00
}
2006-06-27 00:41:40 -04:00
return 0 ;
2005-04-16 15:20:36 -07:00
}