2005-04-17 02:20:36 +04:00
/*
* Copyright ( C ) 1995 Linus Torvalds
*
* Pentium III FXSR , SSE support
* Gareth Hughes < gareth @ valinux . com > , May 2000
*/
/*
* This file handles the architecture - dependent parts of process handling . .
*/
# include <stdarg.h>
2009-02-11 10:31:00 +03:00
# include <linux/stackprotector.h>
2005-06-26 01:54:50 +04:00
# include <linux/cpu.h>
2005-04-17 02:20:36 +04:00
# include <linux/errno.h>
# include <linux/sched.h>
# include <linux/fs.h>
# include <linux/kernel.h>
# include <linux/mm.h>
# include <linux/elfcore.h>
# include <linux/smp.h>
# include <linux/stddef.h>
# include <linux/slab.h>
# include <linux/vmalloc.h>
# include <linux/user.h>
# include <linux/interrupt.h>
# include <linux/utsname.h>
# include <linux/delay.h>
# include <linux/reboot.h>
# include <linux/init.h>
# include <linux/mc146818rtc.h>
# include <linux/module.h>
# include <linux/kallsyms.h>
# include <linux/ptrace.h>
# include <linux/random.h>
2006-09-26 12:52:28 +04:00
# include <linux/personality.h>
2007-02-16 12:28:07 +03:00
# include <linux/tick.h>
2007-05-02 21:27:16 +04:00
# include <linux/percpu.h>
2008-04-14 02:24:18 +04:00
# include <linux/prctl.h>
2008-09-16 22:27:30 +04:00
# include <linux/dmi.h>
2008-12-06 05:40:00 +03:00
# include <linux/ftrace.h>
2009-01-04 13:48:56 +03:00
# include <linux/uaccess.h>
# include <linux/io.h>
# include <linux/kdebug.h>
2005-04-17 02:20:36 +04:00
# include <asm/pgtable.h>
# include <asm/system.h>
# include <asm/ldt.h>
# include <asm/processor.h>
# include <asm/i387.h>
# include <asm/desc.h>
# ifdef CONFIG_MATH_EMULATION
# include <asm/math_emu.h>
# endif
# include <linux/err.h>
2005-06-26 01:54:50 +04:00
# include <asm/tlbflush.h>
# include <asm/cpu.h>
2008-09-24 06:40:02 +04:00
# include <asm/idle.h>
2008-07-21 20:04:13 +04:00
# include <asm/syscalls.h>
2008-12-19 17:10:24 +03:00
# include <asm/ds.h>
2005-06-26 01:54:50 +04:00
2005-04-17 02:20:36 +04:00
asmlinkage void ret_from_fork ( void ) __asm__ ( " ret_from_fork " ) ;
2007-05-02 21:27:16 +04:00
DEFINE_PER_CPU ( struct task_struct * , current_task ) = & init_task ;
EXPORT_PER_CPU_SYMBOL ( current_task ) ;
2005-04-17 02:20:36 +04:00
/*
* Return saved PC of a blocked thread .
*/
unsigned long thread_saved_pc ( struct task_struct * tsk )
{
2008-01-30 15:31:02 +03:00
return ( ( unsigned long * ) tsk - > thread . sp ) [ 3 ] ;
2005-04-17 02:20:36 +04:00
}
2008-09-03 17:30:23 +04:00
# ifndef CONFIG_SMP
static inline void play_dead ( void )
{
BUG ( ) ;
}
# endif
2005-04-17 02:20:36 +04:00
/*
* The idle thread . There ' s no useful work to be
* done , so just try to conserve power and have a
* low exit latency ( ie sit in a loop waiting for
* somebody to say that they ' d like to reschedule )
*/
2005-06-26 01:54:50 +04:00
void cpu_idle ( void )
2005-04-17 02:20:36 +04:00
{
2005-11-09 08:39:01 +03:00
int cpu = smp_processor_id ( ) ;
2005-06-26 01:54:50 +04:00
2009-02-11 10:31:00 +03:00
/*
* If we ' re the non - boot CPU , nothing set the stack canary up
* for us . CPU0 already has it initialized but no harm in
* doing it again . This is a good place for updating it , as
* we wont ever return from this function ( so the invalid
* canaries already on the stack wont ever trigger ) .
*/
boot_init_stack_canary ( ) ;
2006-06-26 15:59:11 +04:00
current_thread_info ( ) - > status | = TS_POLLING ;
[PATCH] sched: resched and cpu_idle rework
Make some changes to the NEED_RESCHED and POLLING_NRFLAG to reduce
confusion, and make their semantics rigid. Improves efficiency of
resched_task and some cpu_idle routines.
* In resched_task:
- TIF_NEED_RESCHED is only cleared with the task's runqueue lock held,
and as we hold it during resched_task, then there is no need for an
atomic test and set there. The only other time this should be set is
when the task's quantum expires, in the timer interrupt - this is
protected against because the rq lock is irq-safe.
- If TIF_NEED_RESCHED is set, then we don't need to do anything. It
won't get unset until the task get's schedule()d off.
- If we are running on the same CPU as the task we resched, then set
TIF_NEED_RESCHED and no further action is required.
- If we are running on another CPU, and TIF_POLLING_NRFLAG is *not* set
after TIF_NEED_RESCHED has been set, then we need to send an IPI.
Using these rules, we are able to remove the test and set operation in
resched_task, and make clear the previously vague semantics of
POLLING_NRFLAG.
* In idle routines:
- Enter cpu_idle with preempt disabled. When the need_resched() condition
becomes true, explicitly call schedule(). This makes things a bit clearer
(IMO), but haven't updated all architectures yet.
- Many do a test and clear of TIF_NEED_RESCHED for some reason. According
to the resched_task rules, this isn't needed (and actually breaks the
assumption that TIF_NEED_RESCHED is only cleared with the runqueue lock
held). So remove that. Generally one less locked memory op when switching
to the idle thread.
- Many idle routines clear TIF_POLLING_NRFLAG, and only set it in the inner
most polling idle loops. The above resched_task semantics allow it to be
set until before the last time need_resched() is checked before going into
a halt requiring interrupt wakeup.
Many idle routines simply never enter such a halt, and so POLLING_NRFLAG
can be always left set, completely eliminating resched IPIs when rescheduling
the idle task.
POLLING_NRFLAG width can be increased, to reduce the chance of resched IPIs.
Signed-off-by: Nick Piggin <npiggin@suse.de>
Cc: Ingo Molnar <mingo@elte.hu>
Cc: Con Kolivas <kernel@kolivas.org>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
2005-11-09 08:39:04 +03:00
2005-04-17 02:20:36 +04:00
/* endless idle loop with no priority at all */
while ( 1 ) {
2008-07-18 19:27:28 +04:00
tick_nohz_stop_sched_tick ( 1 ) ;
2005-04-17 02:20:36 +04:00
while ( ! need_resched ( ) ) {
2007-05-12 22:15:24 +04:00
check_pgt_cache ( ) ;
2005-04-17 02:20:36 +04:00
rmb ( ) ;
2005-06-26 01:54:50 +04:00
if ( cpu_is_offline ( cpu ) )
play_dead ( ) ;
2008-04-25 19:39:01 +04:00
local_irq_disable ( ) ;
2008-05-12 23:20:42 +04:00
/* Don't trace irqs off for idle */
stop_critical_timings ( ) ;
2008-06-09 18:59:53 +04:00
pm_idle ( ) ;
2008-05-12 23:20:42 +04:00
start_critical_timings ( ) ;
2005-04-17 02:20:36 +04:00
}
2007-02-16 12:28:07 +03:00
tick_nohz_restart_sched_tick ( ) ;
2005-11-09 08:39:01 +03:00
preempt_enable_no_resched ( ) ;
2005-04-17 02:20:36 +04:00
schedule ( ) ;
2005-11-09 08:39:01 +03:00
preempt_disable ( ) ;
2005-04-17 02:20:36 +04:00
}
}
2008-04-03 17:40:48 +04:00
void __show_regs ( struct pt_regs * regs , int all )
2005-04-17 02:20:36 +04:00
{
unsigned long cr0 = 0L , cr2 = 0L , cr3 = 0L , cr4 = 0L ;
2007-07-21 19:10:42 +04:00
unsigned long d0 , d1 , d2 , d3 , d6 , d7 ;
2008-01-30 15:30:56 +03:00
unsigned long sp ;
2007-10-19 22:35:03 +04:00
unsigned short ss , gs ;
2008-09-16 22:27:30 +04:00
const char * board ;
2007-10-19 22:35:03 +04:00
if ( user_mode_vm ( regs ) ) {
2008-01-30 15:30:56 +03:00
sp = regs - > sp ;
ss = regs - > ss & 0xffff ;
2009-02-09 16:17:40 +03:00
gs = get_user_gs ( regs ) ;
2007-10-19 22:35:03 +04:00
} else {
2008-01-30 15:30:56 +03:00
sp = ( unsigned long ) ( & regs - > sp ) ;
2007-10-19 22:35:03 +04:00
savesegment ( ss , ss ) ;
savesegment ( gs , gs ) ;
}
2005-04-17 02:20:36 +04:00
printk ( " \n " ) ;
2008-09-16 22:27:30 +04:00
board = dmi_get_system_info ( DMI_PRODUCT_NAME ) ;
if ( ! board )
board = " " ;
printk ( " Pid: %d, comm: %s %s (%s %.*s) %s \n " ,
2007-10-20 02:06:00 +04:00
task_pid_nr ( current ) , current - > comm ,
2007-10-19 22:35:03 +04:00
print_tainted ( ) , init_utsname ( ) - > release ,
( int ) strcspn ( init_utsname ( ) - > version , " " ) ,
2008-09-16 22:27:30 +04:00
init_utsname ( ) - > version , board ) ;
2007-10-19 22:35:03 +04:00
printk ( " EIP: %04x:[<%08lx>] EFLAGS: %08lx CPU: %d \n " ,
2008-02-08 23:09:56 +03:00
( u16 ) regs - > cs , regs - > ip , regs - > flags ,
2007-10-19 22:35:03 +04:00
smp_processor_id ( ) ) ;
2008-01-30 15:30:56 +03:00
print_symbol ( " EIP is at %s \n " , regs - > ip ) ;
2005-04-17 02:20:36 +04:00
printk ( " EAX: %08lx EBX: %08lx ECX: %08lx EDX: %08lx \n " ,
2008-01-30 15:30:56 +03:00
regs - > ax , regs - > bx , regs - > cx , regs - > dx ) ;
2007-10-19 22:35:03 +04:00
printk ( " ESI: %08lx EDI: %08lx EBP: %08lx ESP: %08lx \n " ,
2008-01-30 15:30:56 +03:00
regs - > si , regs - > di , regs - > bp , sp ) ;
2007-10-19 22:35:03 +04:00
printk ( " DS: %04x ES: %04x FS: %04x GS: %04x SS: %04x \n " ,
2008-02-08 23:09:56 +03:00
( u16 ) regs - > ds , ( u16 ) regs - > es , ( u16 ) regs - > fs , gs , ss ) ;
2007-10-19 22:35:03 +04:00
if ( ! all )
return ;
2005-04-17 02:20:36 +04:00
2005-09-04 02:56:36 +04:00
cr0 = read_cr0 ( ) ;
cr2 = read_cr2 ( ) ;
cr3 = read_cr3 ( ) ;
2006-01-06 11:11:50 +03:00
cr4 = read_cr4_safe ( ) ;
2007-10-19 22:35:03 +04:00
printk ( " CR0: %08lx CR2: %08lx CR3: %08lx CR4: %08lx \n " ,
cr0 , cr2 , cr3 , cr4 ) ;
2007-07-21 19:10:42 +04:00
get_debugreg ( d0 , 0 ) ;
get_debugreg ( d1 , 1 ) ;
get_debugreg ( d2 , 2 ) ;
get_debugreg ( d3 , 3 ) ;
printk ( " DR0: %08lx DR1: %08lx DR2: %08lx DR3: %08lx \n " ,
d0 , d1 , d2 , d3 ) ;
2007-10-19 22:35:03 +04:00
2007-07-21 19:10:42 +04:00
get_debugreg ( d6 , 6 ) ;
get_debugreg ( d7 , 7 ) ;
2007-10-19 22:35:03 +04:00
printk ( " DR6: %08lx DR7: %08lx \n " ,
d6 , d7 ) ;
}
2007-07-21 19:10:42 +04:00
2007-10-19 22:35:03 +04:00
void show_regs ( struct pt_regs * regs )
{
2008-04-03 17:40:48 +04:00
__show_regs ( regs , 1 ) ;
2008-01-30 15:33:07 +03:00
show_trace ( NULL , regs , & regs - > sp , regs - > bp ) ;
2005-04-17 02:20:36 +04:00
}
/*
2008-01-30 15:30:56 +03:00
* This gets run with % bx containing the
* function to call , and % dx containing
2005-04-17 02:20:36 +04:00
* the " args " .
*/
extern void kernel_thread_helper ( void ) ;
/*
* Create a kernel thread
*/
2009-01-04 13:48:56 +03:00
int kernel_thread ( int ( * fn ) ( void * ) , void * arg , unsigned long flags )
2005-04-17 02:20:36 +04:00
{
struct pt_regs regs ;
memset ( & regs , 0 , sizeof ( regs ) ) ;
2008-01-30 15:30:56 +03:00
regs . bx = ( unsigned long ) fn ;
regs . dx = ( unsigned long ) arg ;
2005-04-17 02:20:36 +04:00
2008-01-30 15:30:56 +03:00
regs . ds = __USER_DS ;
regs . es = __USER_DS ;
regs . fs = __KERNEL_PERCPU ;
2009-02-09 16:17:40 +03:00
regs . gs = __KERNEL_STACK_CANARY ;
2008-01-30 15:30:56 +03:00
regs . orig_ax = - 1 ;
regs . ip = ( unsigned long ) kernel_thread_helper ;
regs . cs = __KERNEL_CS | get_kernel_rpl ( ) ;
regs . flags = X86_EFLAGS_IF | X86_EFLAGS_SF | X86_EFLAGS_PF | 0x2 ;
2005-04-17 02:20:36 +04:00
/* Ok, create the new process.. */
2006-10-21 20:37:02 +04:00
return do_fork ( flags | CLONE_VM | CLONE_UNTRACED , 0 , & regs , 0 , NULL , NULL ) ;
2005-04-17 02:20:36 +04:00
}
2005-06-23 11:08:33 +04:00
EXPORT_SYMBOL ( kernel_thread ) ;
2005-04-17 02:20:36 +04:00
void release_thread ( struct task_struct * dead_task )
{
2006-01-06 11:11:59 +03:00
BUG_ON ( dead_task - > mm ) ;
2005-04-17 02:20:36 +04:00
release_vm86_irqs ( dead_task ) ;
}
/*
* This gets called before we allocate a new thread and copy
* the current task into it .
*/
void prepare_to_copy ( struct task_struct * tsk )
{
unlazy_fpu ( tsk ) ;
}
2008-01-30 15:30:56 +03:00
int copy_thread ( int nr , unsigned long clone_flags , unsigned long sp ,
2005-04-17 02:20:36 +04:00
unsigned long unused ,
2009-01-04 13:48:56 +03:00
struct task_struct * p , struct pt_regs * regs )
2005-04-17 02:20:36 +04:00
{
2009-01-04 13:48:56 +03:00
struct pt_regs * childregs ;
2005-04-17 02:20:36 +04:00
struct task_struct * tsk ;
int err ;
2006-01-12 12:05:41 +03:00
childregs = task_pt_regs ( p ) ;
2005-05-06 03:15:03 +04:00
* childregs = * regs ;
2008-01-30 15:30:56 +03:00
childregs - > ax = 0 ;
childregs - > sp = sp ;
2005-05-06 03:15:03 +04:00
2008-01-30 15:31:02 +03:00
p - > thread . sp = ( unsigned long ) childregs ;
p - > thread . sp0 = ( unsigned long ) ( childregs + 1 ) ;
2005-04-17 02:20:36 +04:00
2008-01-30 15:31:02 +03:00
p - > thread . ip = ( unsigned long ) ret_from_fork ;
2005-04-17 02:20:36 +04:00
2009-02-09 16:17:40 +03:00
task_user_gs ( p ) = get_user_gs ( regs ) ;
2005-04-17 02:20:36 +04:00
tsk = current ;
2006-07-10 05:12:39 +04:00
if ( unlikely ( test_tsk_thread_flag ( tsk , TIF_IO_BITMAP ) ) ) {
2006-10-01 10:27:21 +04:00
p - > thread . io_bitmap_ptr = kmemdup ( tsk - > thread . io_bitmap_ptr ,
IO_BITMAP_BYTES , GFP_KERNEL ) ;
2005-04-17 02:20:36 +04:00
if ( ! p - > thread . io_bitmap_ptr ) {
p - > thread . io_bitmap_max = 0 ;
return - ENOMEM ;
}
2006-07-10 05:12:39 +04:00
set_tsk_thread_flag ( p , TIF_IO_BITMAP ) ;
2005-04-17 02:20:36 +04:00
}
2008-01-30 15:30:46 +03:00
err = 0 ;
2005-04-17 02:20:36 +04:00
/*
* Set a new TLS for the child thread ?
*/
2008-01-30 15:30:46 +03:00
if ( clone_flags & CLONE_SETTLS )
err = do_set_thread_area ( p , - 1 ,
2008-01-30 15:30:56 +03:00
( struct user_desc __user * ) childregs - > si , 0 ) ;
2005-04-17 02:20:36 +04:00
if ( err & & p - > thread . io_bitmap_ptr ) {
kfree ( p - > thread . io_bitmap_ptr ) ;
p - > thread . io_bitmap_max = 0 ;
}
2008-12-19 17:10:24 +03:00
ds_copy_thread ( p , current ) ;
clear_tsk_thread_flag ( p , TIF_DEBUGCTLMSR ) ;
p - > thread . debugctlmsr = 0 ;
2005-04-17 02:20:36 +04:00
return err ;
}
2008-02-21 07:18:40 +03:00
void
start_thread ( struct pt_regs * regs , unsigned long new_ip , unsigned long new_sp )
{
2009-02-09 16:17:40 +03:00
set_user_gs ( regs , 0 ) ;
2008-02-21 07:18:40 +03:00
regs - > fs = 0 ;
set_fs ( USER_DS ) ;
regs - > ds = __USER_DS ;
regs - > es = __USER_DS ;
regs - > ss = __USER_DS ;
regs - > cs = __USER_CS ;
regs - > ip = new_ip ;
regs - > sp = new_sp ;
2008-03-11 01:28:05 +03:00
/*
* Free the old FP and other extended state
*/
free_thread_xstate ( current ) ;
2008-02-21 07:18:40 +03:00
}
EXPORT_SYMBOL_GPL ( start_thread ) ;
2005-04-17 02:20:36 +04:00
/*
* switch_to ( x , yn ) should switch tasks from x to y .
*
* We fsave / fwait so that an exception goes off at the right time
* ( as a call from the fsave or fwait in effect ) rather than to
* the wrong process . Lazy FP saving no longer makes any sense
* with modern CPU ' s , and this simplifies a lot of things ( SMP
* and UP become the same ) .
*
* NOTE ! We used to use the x86 hardware context switching . The
* reason for not using it any more becomes apparent when you
* try to recover gracefully from saved state that is no longer
* valid ( stale segment register values in particular ) . With the
* hardware task - switch , there is no way to fix up bad state in
* a reasonable manner .
*
* The fact that Intel documents the hardware task - switching to
* be slow is a fairly red herring - this code is not noticeably
* faster . However , there _is_ some room for improvement here ,
* so the performance issues may eventually be a valid point .
* More important , however , is the fact that this allows us much
* more flexibility .
*
2008-01-30 15:30:56 +03:00
* The return value ( in % ax ) will be the " prev " task after
2005-04-17 02:20:36 +04:00
* the task - switch , and shows up in ret_from_fork in entry . S ,
* for example .
*/
2008-12-06 05:40:00 +03:00
__notrace_funcgraph struct task_struct *
__switch_to ( struct task_struct * prev_p , struct task_struct * next_p )
2005-04-17 02:20:36 +04:00
{
struct thread_struct * prev = & prev_p - > thread ,
* next = & next_p - > thread ;
int cpu = smp_processor_id ( ) ;
struct tss_struct * tss = & per_cpu ( init_tss , cpu ) ;
/* never put a printk in __switch_to... printk() calls wake_up*() indirectly */
__unlazy_fpu ( prev_p ) ;
2006-12-07 04:14:01 +03:00
/* we're going to use this soon, after a few expensive things */
if ( next_p - > fpu_counter > 5 )
2008-03-11 01:28:04 +03:00
prefetch ( next - > xstate ) ;
2006-12-07 04:14:01 +03:00
2005-04-17 02:20:36 +04:00
/*
2005-09-04 02:56:39 +04:00
* Reload esp0 .
2005-04-17 02:20:36 +04:00
*/
2008-01-30 15:31:02 +03:00
load_sp0 ( tss , next ) ;
2005-04-17 02:20:36 +04:00
/*
2007-02-13 15:26:20 +03:00
* Save away % gs . No need to save % fs , as it was saved on the
[PATCH] i386: Use %gs as the PDA base-segment in the kernel
This patch is the meat of the PDA change. This patch makes several related
changes:
1: Most significantly, %gs is now used in the kernel. This means that on
entry, the old value of %gs is saved away, and it is reloaded with
__KERNEL_PDA.
2: entry.S constructs the stack in the shape of struct pt_regs, and this
is passed around the kernel so that the process's saved register
state can be accessed.
Unfortunately struct pt_regs doesn't currently have space for %gs
(or %fs). This patch extends pt_regs to add space for gs (no space
is allocated for %fs, since it won't be used, and it would just
complicate the code in entry.S to work around the space).
3: Because %gs is now saved on the stack like %ds, %es and the integer
registers, there are a number of places where it no longer needs to
be handled specially; namely context switch, and saving/restoring the
register state in a signal context.
4: And since kernel threads run in kernel space and call normal kernel
code, they need to be created with their %gs == __KERNEL_PDA.
Signed-off-by: Jeremy Fitzhardinge <jeremy@xensource.com>
Signed-off-by: Andi Kleen <ak@suse.de>
Cc: Chuck Ebbert <76306.1226@compuserve.com>
Cc: Zachary Amsden <zach@vmware.com>
Cc: Jan Beulich <jbeulich@novell.com>
Cc: Andi Kleen <ak@suse.de>
Signed-off-by: Andrew Morton <akpm@osdl.org>
2006-12-07 04:14:02 +03:00
* stack on entry . No need to save % es and % ds , as those are
* always kernel segments while inside the kernel . Doing this
* before setting the new TLS descriptors avoids the situation
* where we temporarily have non - reloadable segments in % fs
* and % gs . This could be an issue if the NMI handler ever
* used % fs or % gs ( it does not today ) , or if the kernel is
* running inside of a hypervisor layer .
2005-04-17 02:20:36 +04:00
*/
2009-02-09 16:17:40 +03:00
lazy_save_gs ( prev - > gs ) ;
2005-04-17 02:20:36 +04:00
/*
2005-09-04 02:56:39 +04:00
* Load the per - thread Thread - Local Storage descriptor .
2005-04-17 02:20:36 +04:00
*/
2005-09-04 02:56:39 +04:00
load_TLS ( next , cpu ) ;
2005-04-17 02:20:36 +04:00
2007-02-13 15:26:21 +03:00
/*
* Restore IOPL if needed . In normal use , the flags restore
* in the switch assembly will handle this . But if the kernel
* is running virtualized at a non - zero CPL , the popf will
* not restore flags , so it must be done in a separate step .
*/
if ( get_kernel_rpl ( ) & & unlikely ( prev - > iopl ! = next - > iopl ) )
set_iopl_mask ( next - > iopl ) ;
2005-04-17 02:20:36 +04:00
/*
2006-07-10 05:12:39 +04:00
* Now maybe handle debug registers and / or IO bitmaps
2005-04-17 02:20:36 +04:00
*/
2007-07-16 10:41:33 +04:00
if ( unlikely ( task_thread_info ( prev_p ) - > flags & _TIF_WORK_CTXSW_PREV | |
task_thread_info ( next_p ) - > flags & _TIF_WORK_CTXSW_NEXT ) )
__switch_to_xtra ( prev_p , next_p , tss ) ;
2005-06-28 01:36:36 +04:00
2007-02-13 15:26:21 +03:00
/*
* Leave lazy mode , flushing any hypercalls made here .
* This must be done before restoring TLS segments so
* the GDT and LDT are properly updated , and must be
* done before math_state_restore , so the TS bit is up
* to date .
*/
arch_leave_lazy_cpu_mode ( ) ;
2006-12-07 04:14:01 +03:00
/* If the task has used fpu the last 5 timeslices, just do a full
* restore of the math state immediately to avoid the trap ; the
* chances of needing FPU soon are obviously high now
2008-06-03 02:57:27 +04:00
*
* tsk_used_math ( ) checks prevent calling math_state_restore ( ) ,
* which can sleep in the case of ! tsk_used_math ( )
2006-12-07 04:14:01 +03:00
*/
2008-06-03 02:57:27 +04:00
if ( tsk_used_math ( next_p ) & & next_p - > fpu_counter > 5 )
2006-12-07 04:14:01 +03:00
math_state_restore ( ) ;
2007-02-13 15:26:21 +03:00
/*
* Restore % gs if needed ( which is common )
*/
if ( prev - > gs | next - > gs )
2009-02-09 16:17:40 +03:00
lazy_load_gs ( next - > gs ) ;
2007-02-13 15:26:21 +03:00
percpu: add optimized generic percpu accessors
It is an optimization and a cleanup, and adds the following new
generic percpu methods:
percpu_read()
percpu_write()
percpu_add()
percpu_sub()
percpu_and()
percpu_or()
percpu_xor()
and implements support for them on x86. (other architectures will fall
back to a default implementation)
The advantage is that for example to read a local percpu variable,
instead of this sequence:
return __get_cpu_var(var);
ffffffff8102ca2b: 48 8b 14 fd 80 09 74 mov -0x7e8bf680(,%rdi,8),%rdx
ffffffff8102ca32: 81
ffffffff8102ca33: 48 c7 c0 d8 59 00 00 mov $0x59d8,%rax
ffffffff8102ca3a: 48 8b 04 10 mov (%rax,%rdx,1),%rax
We can get a single instruction by using the optimized variants:
return percpu_read(var);
ffffffff8102ca3f: 65 48 8b 05 91 8f fd mov %gs:0x7efd8f91(%rip),%rax
I also cleaned up the x86-specific APIs and made the x86 code use
these new generic percpu primitives.
tj: * fixed generic percpu_sub() definition as Roel Kluin pointed out
* added percpu_and() for completeness's sake
* made generic percpu ops atomic against preemption
Signed-off-by: Ingo Molnar <mingo@elte.hu>
Signed-off-by: Tejun Heo <tj@kernel.org>
2009-01-15 16:15:53 +03:00
percpu_write ( current_task , next_p ) ;
2007-02-13 15:26:21 +03:00
2005-04-17 02:20:36 +04:00
return prev_p ;
}
2009-02-12 00:43:58 +03:00
int sys_clone ( struct pt_regs * regs )
2005-04-17 02:20:36 +04:00
{
2009-02-12 00:43:58 +03:00
unsigned long clone_flags ;
unsigned long newsp ;
int __user * parent_tidptr , * child_tidptr ;
clone_flags = regs - > bx ;
newsp = regs - > cx ;
parent_tidptr = ( int __user * ) regs - > dx ;
child_tidptr = ( int __user * ) regs - > di ;
2005-04-17 02:20:36 +04:00
if ( ! newsp )
2009-02-10 17:51:46 +03:00
newsp = regs - > sp ;
return do_fork ( clone_flags , newsp , regs , 0 , parent_tidptr , child_tidptr ) ;
2005-04-17 02:20:36 +04:00
}
/*
* sys_execve ( ) executes a new program .
*/
2009-02-12 00:43:58 +03:00
int sys_execve ( struct pt_regs * regs )
2005-04-17 02:20:36 +04:00
{
int error ;
2009-01-04 13:48:56 +03:00
char * filename ;
2005-04-17 02:20:36 +04:00
2009-02-12 00:43:58 +03:00
filename = getname ( ( char __user * ) regs - > bx ) ;
2005-04-17 02:20:36 +04:00
error = PTR_ERR ( filename ) ;
if ( IS_ERR ( filename ) )
goto out ;
2009-02-12 00:43:58 +03:00
error = do_execve ( filename ,
( char __user * __user * ) regs - > cx ,
( char __user * __user * ) regs - > dx ,
regs ) ;
2005-04-17 02:20:36 +04:00
if ( error = = 0 ) {
/* Make sure we don't return using sysenter.. */
set_thread_flag ( TIF_IRET ) ;
}
putname ( filename ) ;
out :
return error ;
}
# define top_esp (THREAD_SIZE - sizeof(unsigned long))
# define top_ebp (THREAD_SIZE - 2*sizeof(unsigned long))
unsigned long get_wchan ( struct task_struct * p )
{
2008-01-30 15:30:56 +03:00
unsigned long bp , sp , ip ;
2005-04-17 02:20:36 +04:00
unsigned long stack_page ;
int count = 0 ;
if ( ! p | | p = = current | | p - > state = = TASK_RUNNING )
return 0 ;
2006-01-12 12:05:41 +03:00
stack_page = ( unsigned long ) task_stack_page ( p ) ;
2008-01-30 15:31:02 +03:00
sp = p - > thread . sp ;
2008-01-30 15:30:56 +03:00
if ( ! stack_page | | sp < stack_page | | sp > top_esp + stack_page )
2005-04-17 02:20:36 +04:00
return 0 ;
2008-01-30 15:30:56 +03:00
/* include/asm-i386/system.h:switch_to() pushes bp last. */
bp = * ( unsigned long * ) sp ;
2005-04-17 02:20:36 +04:00
do {
2008-01-30 15:30:56 +03:00
if ( bp < stack_page | | bp > top_ebp + stack_page )
2005-04-17 02:20:36 +04:00
return 0 ;
2008-01-30 15:30:56 +03:00
ip = * ( unsigned long * ) ( bp + 4 ) ;
if ( ! in_sched_functions ( ip ) )
return ip ;
bp = * ( unsigned long * ) bp ;
2005-04-17 02:20:36 +04:00
} while ( count + + < 16 ) ;
return 0 ;
}
unsigned long arch_align_stack ( unsigned long sp )
{
2006-09-26 12:52:28 +04:00
if ( ! ( current - > personality & ADDR_NO_RANDOMIZE ) & & randomize_va_space )
2005-04-17 02:20:36 +04:00
sp - = get_random_int ( ) % 8192 ;
return sp & ~ 0xf ;
}
2008-01-30 15:30:40 +03:00
unsigned long arch_randomize_brk ( struct mm_struct * mm )
{
unsigned long range_end = mm - > brk + 0x02000000 ;
return randomize_range ( mm - > brk , range_end , 0 ) ? : mm - > brk ;
}