2005-04-16 15:20:36 -07:00
/*
* linux / arch / alpha / kernel / process . c
*
* Copyright ( C ) 1995 Linus Torvalds
*/
/*
* This file handles the architecture - dependent parts of process handling .
*/
# include <linux/errno.h>
# include <linux/module.h>
# include <linux/sched.h>
# include <linux/kernel.h>
# include <linux/mm.h>
# include <linux/smp.h>
# include <linux/smp_lock.h>
# include <linux/stddef.h>
# include <linux/unistd.h>
# include <linux/ptrace.h>
# include <linux/slab.h>
# include <linux/user.h>
# include <linux/a.out.h>
# include <linux/utsname.h>
# include <linux/time.h>
# include <linux/major.h>
# include <linux/stat.h>
2006-07-10 04:44:12 -07:00
# include <linux/vt.h>
2005-04-16 15:20:36 -07:00
# include <linux/mman.h>
# include <linux/elfcore.h>
# include <linux/reboot.h>
# include <linux/tty.h>
# include <linux/console.h>
# include <asm/reg.h>
# include <asm/uaccess.h>
# include <asm/system.h>
# include <asm/io.h>
# include <asm/pgtable.h>
# include <asm/hwrpb.h>
# include <asm/fpu.h>
# include "proto.h"
# include "pci_impl.h"
2006-01-08 01:03:46 -08:00
/*
* Power off function , if any
*/
void ( * pm_power_off ) ( void ) = machine_power_off ;
2005-04-16 15:20:36 -07:00
void
cpu_idle ( void )
{
[PATCH] sched: resched and cpu_idle rework
Make some changes to the NEED_RESCHED and POLLING_NRFLAG to reduce
confusion, and make their semantics rigid. Improves efficiency of
resched_task and some cpu_idle routines.
* In resched_task:
- TIF_NEED_RESCHED is only cleared with the task's runqueue lock held,
and as we hold it during resched_task, then there is no need for an
atomic test and set there. The only other time this should be set is
when the task's quantum expires, in the timer interrupt - this is
protected against because the rq lock is irq-safe.
- If TIF_NEED_RESCHED is set, then we don't need to do anything. It
won't get unset until the task get's schedule()d off.
- If we are running on the same CPU as the task we resched, then set
TIF_NEED_RESCHED and no further action is required.
- If we are running on another CPU, and TIF_POLLING_NRFLAG is *not* set
after TIF_NEED_RESCHED has been set, then we need to send an IPI.
Using these rules, we are able to remove the test and set operation in
resched_task, and make clear the previously vague semantics of
POLLING_NRFLAG.
* In idle routines:
- Enter cpu_idle with preempt disabled. When the need_resched() condition
becomes true, explicitly call schedule(). This makes things a bit clearer
(IMO), but haven't updated all architectures yet.
- Many do a test and clear of TIF_NEED_RESCHED for some reason. According
to the resched_task rules, this isn't needed (and actually breaks the
assumption that TIF_NEED_RESCHED is only cleared with the runqueue lock
held). So remove that. Generally one less locked memory op when switching
to the idle thread.
- Many idle routines clear TIF_POLLING_NRFLAG, and only set it in the inner
most polling idle loops. The above resched_task semantics allow it to be
set until before the last time need_resched() is checked before going into
a halt requiring interrupt wakeup.
Many idle routines simply never enter such a halt, and so POLLING_NRFLAG
can be always left set, completely eliminating resched IPIs when rescheduling
the idle task.
POLLING_NRFLAG width can be increased, to reduce the chance of resched IPIs.
Signed-off-by: Nick Piggin <npiggin@suse.de>
Cc: Ingo Molnar <mingo@elte.hu>
Cc: Con Kolivas <kernel@kolivas.org>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
2005-11-08 21:39:04 -08:00
set_thread_flag ( TIF_POLLING_NRFLAG ) ;
2005-04-16 15:20:36 -07:00
while ( 1 ) {
/* FIXME -- EV6 and LCA45 know how to power down
the CPU . */
while ( ! need_resched ( ) )
[PATCH] sched: resched and cpu_idle rework
Make some changes to the NEED_RESCHED and POLLING_NRFLAG to reduce
confusion, and make their semantics rigid. Improves efficiency of
resched_task and some cpu_idle routines.
* In resched_task:
- TIF_NEED_RESCHED is only cleared with the task's runqueue lock held,
and as we hold it during resched_task, then there is no need for an
atomic test and set there. The only other time this should be set is
when the task's quantum expires, in the timer interrupt - this is
protected against because the rq lock is irq-safe.
- If TIF_NEED_RESCHED is set, then we don't need to do anything. It
won't get unset until the task get's schedule()d off.
- If we are running on the same CPU as the task we resched, then set
TIF_NEED_RESCHED and no further action is required.
- If we are running on another CPU, and TIF_POLLING_NRFLAG is *not* set
after TIF_NEED_RESCHED has been set, then we need to send an IPI.
Using these rules, we are able to remove the test and set operation in
resched_task, and make clear the previously vague semantics of
POLLING_NRFLAG.
* In idle routines:
- Enter cpu_idle with preempt disabled. When the need_resched() condition
becomes true, explicitly call schedule(). This makes things a bit clearer
(IMO), but haven't updated all architectures yet.
- Many do a test and clear of TIF_NEED_RESCHED for some reason. According
to the resched_task rules, this isn't needed (and actually breaks the
assumption that TIF_NEED_RESCHED is only cleared with the runqueue lock
held). So remove that. Generally one less locked memory op when switching
to the idle thread.
- Many idle routines clear TIF_POLLING_NRFLAG, and only set it in the inner
most polling idle loops. The above resched_task semantics allow it to be
set until before the last time need_resched() is checked before going into
a halt requiring interrupt wakeup.
Many idle routines simply never enter such a halt, and so POLLING_NRFLAG
can be always left set, completely eliminating resched IPIs when rescheduling
the idle task.
POLLING_NRFLAG width can be increased, to reduce the chance of resched IPIs.
Signed-off-by: Nick Piggin <npiggin@suse.de>
Cc: Ingo Molnar <mingo@elte.hu>
Cc: Con Kolivas <kernel@kolivas.org>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
2005-11-08 21:39:04 -08:00
cpu_relax ( ) ;
2005-04-16 15:20:36 -07:00
schedule ( ) ;
}
}
struct halt_info {
int mode ;
char * restart_cmd ;
} ;
static void
common_shutdown_1 ( void * generic_ptr )
{
struct halt_info * how = ( struct halt_info * ) generic_ptr ;
struct percpu_struct * cpup ;
unsigned long * pflags , flags ;
int cpuid = smp_processor_id ( ) ;
/* No point in taking interrupts anymore. */
local_irq_disable ( ) ;
cpup = ( struct percpu_struct * )
( ( unsigned long ) hwrpb + hwrpb - > processor_offset
+ hwrpb - > processor_size * cpuid ) ;
pflags = & cpup - > flags ;
flags = * pflags ;
/* Clear reason to "default"; clear "bootstrap in progress". */
flags & = ~ 0x00ff0001UL ;
# ifdef CONFIG_SMP
/* Secondaries halt here. */
if ( cpuid ! = boot_cpuid ) {
flags | = 0x00040000UL ; /* "remain halted" */
* pflags = flags ;
2006-06-04 02:51:34 -07:00
cpu_clear ( cpuid , cpu_present_map ) ;
2005-04-16 15:20:36 -07:00
halt ( ) ;
}
# endif
if ( how - > mode = = LINUX_REBOOT_CMD_RESTART ) {
if ( ! how - > restart_cmd ) {
flags | = 0x00020000UL ; /* "cold bootstrap" */
} else {
/* For SRM, we could probably set environment
variables to get this to work . We ' d have to
delay this until after srm_paging_stop unless
we ever got srm_fixup working .
At the moment , SRM will use the last boot device ,
but the file and flags will be the defaults , when
doing a " warm " bootstrap . */
flags | = 0x00030000UL ; /* "warm bootstrap" */
}
} else {
flags | = 0x00040000UL ; /* "remain halted" */
}
* pflags = flags ;
# ifdef CONFIG_SMP
/* Wait for the secondaries to halt. */
2006-06-04 02:51:34 -07:00
cpu_clear ( boot_cpuid , cpu_present_map ) ;
while ( cpus_weight ( cpu_present_map ) )
2005-04-16 15:20:36 -07:00
barrier ( ) ;
# endif
/* If booted from SRM, reset some of the original environment. */
if ( alpha_using_srm ) {
# ifdef CONFIG_DUMMY_CONSOLE
2005-09-22 21:43:57 -07:00
/* If we've gotten here after SysRq-b, leave interrupt
context before taking over the console . */
if ( in_interrupt ( ) )
irq_exit ( ) ;
2005-04-16 15:20:36 -07:00
/* This has the effect of resetting the VGA video origin. */
take_over_console ( & dummy_con , 0 , MAX_NR_CONSOLES - 1 , 1 ) ;
# endif
pci_restore_srm_config ( ) ;
set_hae ( srm_hae ) ;
}
if ( alpha_mv . kill_arch )
alpha_mv . kill_arch ( how - > mode ) ;
if ( ! alpha_using_srm & & how - > mode ! = LINUX_REBOOT_CMD_RESTART ) {
/* Unfortunately, since MILO doesn't currently understand
the hwrpb bits above , we can ' t reliably halt the
processor and keep it halted . So just loop . */
return ;
}
if ( alpha_using_srm )
srm_paging_stop ( ) ;
halt ( ) ;
}
static void
common_shutdown ( int mode , char * restart_cmd )
{
struct halt_info args ;
args . mode = mode ;
args . restart_cmd = restart_cmd ;
on_each_cpu ( common_shutdown_1 , & args , 1 , 0 ) ;
}
void
machine_restart ( char * restart_cmd )
{
common_shutdown ( LINUX_REBOOT_CMD_RESTART , restart_cmd ) ;
}
void
machine_halt ( void )
{
common_shutdown ( LINUX_REBOOT_CMD_HALT , NULL ) ;
}
void
machine_power_off ( void )
{
common_shutdown ( LINUX_REBOOT_CMD_POWER_OFF , NULL ) ;
}
/* Used by sysrq-p, among others. I don't believe r9-r15 are ever
saved in the context it ' s used . */
void
show_regs ( struct pt_regs * regs )
{
dik_show_regs ( regs , NULL ) ;
}
/*
* Re - start a thread when doing execve ( )
*/
void
start_thread ( struct pt_regs * regs , unsigned long pc , unsigned long sp )
{
set_fs ( USER_DS ) ;
regs - > pc = pc ;
regs - > ps = 8 ;
wrusp ( sp ) ;
}
/*
* Free current thread data structures etc . .
*/
void
exit_thread ( void )
{
}
void
flush_thread ( void )
{
/* Arrange for each exec'ed process to start off with a clean slate
with respect to the FPU . This is all exceptions disabled . */
current_thread_info ( ) - > ieee_state = 0 ;
wrfpcr ( FPCR_DYN_NORMAL | ieee_swcr_to_fpcr ( 0 ) ) ;
/* Clean slate for TLS. */
current_thread_info ( ) - > pcb . unique = 0 ;
}
void
release_thread ( struct task_struct * dead_task )
{
}
/*
* " alpha_clone() " . . By the time we get here , the
* non - volatile registers have also been saved on the
* stack . We do some ugly pointer stuff here . . ( see
* also copy_thread )
*
* Notice that " fork() " is implemented in terms of clone ,
* with parameters ( SIGCHLD , 0 ) .
*/
int
alpha_clone ( unsigned long clone_flags , unsigned long usp ,
int __user * parent_tid , int __user * child_tid ,
unsigned long tls_value , struct pt_regs * regs )
{
if ( ! usp )
usp = rdusp ( ) ;
return do_fork ( clone_flags , usp , regs , 0 , parent_tid , child_tid ) ;
}
int
alpha_vfork ( struct pt_regs * regs )
{
return do_fork ( CLONE_VFORK | CLONE_VM | SIGCHLD , rdusp ( ) ,
regs , 0 , NULL , NULL ) ;
}
/*
* Copy an alpha thread . .
*
* Note the " stack_offset " stuff : when returning to kernel mode , we need
* to have some extra stack - space for the kernel stack that still exists
* after the " ret_from_fork " . When returning to user mode , we only want
* the space needed by the syscall stack frame ( ie " struct pt_regs " ) .
* Use the passed " regs " pointer to determine how much space we need
* for a kernel fork ( ) .
*/
int
copy_thread ( int nr , unsigned long clone_flags , unsigned long usp ,
unsigned long unused ,
struct task_struct * p , struct pt_regs * regs )
{
extern void ret_from_fork ( void ) ;
2006-01-12 01:05:36 -08:00
struct thread_info * childti = task_thread_info ( p ) ;
2005-04-16 15:20:36 -07:00
struct pt_regs * childregs ;
struct switch_stack * childstack , * stack ;
unsigned long stack_offset , settls ;
stack_offset = PAGE_SIZE - sizeof ( struct pt_regs ) ;
if ( ! ( regs - > ps & 8 ) )
stack_offset = ( PAGE_SIZE - 1 ) & ( unsigned long ) regs ;
childregs = ( struct pt_regs * )
2006-01-12 01:05:36 -08:00
( stack_offset + PAGE_SIZE + task_stack_page ( p ) ) ;
2005-04-16 15:20:36 -07:00
* childregs = * regs ;
settls = regs - > r20 ;
childregs - > r0 = 0 ;
childregs - > r19 = 0 ;
childregs - > r20 = 1 ; /* OSF/1 has some strange fork() semantics. */
regs - > r20 = 0 ;
stack = ( ( struct switch_stack * ) regs ) - 1 ;
childstack = ( ( struct switch_stack * ) childregs ) - 1 ;
* childstack = * stack ;
childstack - > r26 = ( unsigned long ) ret_from_fork ;
childti - > pcb . usp = usp ;
childti - > pcb . ksp = ( unsigned long ) childstack ;
childti - > pcb . flags = 1 ; /* set FEN, clear everything else */
/* Set a new TLS for the child thread? Peek back into the
syscall arguments that we saved on syscall entry . Oops ,
except we ' d have clobbered it with the parent / child set
of r20 . Read the saved copy . */
/* Note: if CLONE_SETTLS is not set, then we must inherit the
value from the parent , which will have been set by the block
copy in dup_task_struct . This is non - intuitive , but is
required for proper operation in the case of a threaded
application calling fork . */
if ( clone_flags & CLONE_SETTLS )
childti - > pcb . unique = settls ;
return 0 ;
}
/*
* Fill in the user structure for an ECOFF core dump .
*/
void
dump_thread ( struct pt_regs * pt , struct user * dump )
{
/* switch stack follows right below pt_regs: */
struct switch_stack * sw = ( ( struct switch_stack * ) pt ) - 1 ;
dump - > magic = CMAGIC ;
dump - > start_code = current - > mm - > start_code ;
dump - > start_data = current - > mm - > start_data ;
dump - > start_stack = rdusp ( ) & ~ ( PAGE_SIZE - 1 ) ;
dump - > u_tsize = ( ( current - > mm - > end_code - dump - > start_code )
> > PAGE_SHIFT ) ;
dump - > u_dsize = ( ( current - > mm - > brk + PAGE_SIZE - 1 - dump - > start_data )
> > PAGE_SHIFT ) ;
dump - > u_ssize = ( current - > mm - > start_stack - dump - > start_stack
+ PAGE_SIZE - 1 ) > > PAGE_SHIFT ;
/*
* We store the registers in an order / format that is
* compatible with DEC Unix / OSF / 1 as this makes life easier
* for gdb .
*/
dump - > regs [ EF_V0 ] = pt - > r0 ;
dump - > regs [ EF_T0 ] = pt - > r1 ;
dump - > regs [ EF_T1 ] = pt - > r2 ;
dump - > regs [ EF_T2 ] = pt - > r3 ;
dump - > regs [ EF_T3 ] = pt - > r4 ;
dump - > regs [ EF_T4 ] = pt - > r5 ;
dump - > regs [ EF_T5 ] = pt - > r6 ;
dump - > regs [ EF_T6 ] = pt - > r7 ;
dump - > regs [ EF_T7 ] = pt - > r8 ;
dump - > regs [ EF_S0 ] = sw - > r9 ;
dump - > regs [ EF_S1 ] = sw - > r10 ;
dump - > regs [ EF_S2 ] = sw - > r11 ;
dump - > regs [ EF_S3 ] = sw - > r12 ;
dump - > regs [ EF_S4 ] = sw - > r13 ;
dump - > regs [ EF_S5 ] = sw - > r14 ;
dump - > regs [ EF_S6 ] = sw - > r15 ;
dump - > regs [ EF_A3 ] = pt - > r19 ;
dump - > regs [ EF_A4 ] = pt - > r20 ;
dump - > regs [ EF_A5 ] = pt - > r21 ;
dump - > regs [ EF_T8 ] = pt - > r22 ;
dump - > regs [ EF_T9 ] = pt - > r23 ;
dump - > regs [ EF_T10 ] = pt - > r24 ;
dump - > regs [ EF_T11 ] = pt - > r25 ;
dump - > regs [ EF_RA ] = pt - > r26 ;
dump - > regs [ EF_T12 ] = pt - > r27 ;
dump - > regs [ EF_AT ] = pt - > r28 ;
dump - > regs [ EF_SP ] = rdusp ( ) ;
dump - > regs [ EF_PS ] = pt - > ps ;
dump - > regs [ EF_PC ] = pt - > pc ;
dump - > regs [ EF_GP ] = pt - > gp ;
dump - > regs [ EF_A0 ] = pt - > r16 ;
dump - > regs [ EF_A1 ] = pt - > r17 ;
dump - > regs [ EF_A2 ] = pt - > r18 ;
memcpy ( ( char * ) dump - > regs + EF_SIZE , sw - > fp , 32 * 8 ) ;
}
/*
* Fill in the user structure for a ELF core dump .
*/
void
dump_elf_thread ( elf_greg_t * dest , struct pt_regs * pt , struct thread_info * ti )
{
/* switch stack follows right below pt_regs: */
struct switch_stack * sw = ( ( struct switch_stack * ) pt ) - 1 ;
dest [ 0 ] = pt - > r0 ;
dest [ 1 ] = pt - > r1 ;
dest [ 2 ] = pt - > r2 ;
dest [ 3 ] = pt - > r3 ;
dest [ 4 ] = pt - > r4 ;
dest [ 5 ] = pt - > r5 ;
dest [ 6 ] = pt - > r6 ;
dest [ 7 ] = pt - > r7 ;
dest [ 8 ] = pt - > r8 ;
dest [ 9 ] = sw - > r9 ;
dest [ 10 ] = sw - > r10 ;
dest [ 11 ] = sw - > r11 ;
dest [ 12 ] = sw - > r12 ;
dest [ 13 ] = sw - > r13 ;
dest [ 14 ] = sw - > r14 ;
dest [ 15 ] = sw - > r15 ;
dest [ 16 ] = pt - > r16 ;
dest [ 17 ] = pt - > r17 ;
dest [ 18 ] = pt - > r18 ;
dest [ 19 ] = pt - > r19 ;
dest [ 20 ] = pt - > r20 ;
dest [ 21 ] = pt - > r21 ;
dest [ 22 ] = pt - > r22 ;
dest [ 23 ] = pt - > r23 ;
dest [ 24 ] = pt - > r24 ;
dest [ 25 ] = pt - > r25 ;
dest [ 26 ] = pt - > r26 ;
dest [ 27 ] = pt - > r27 ;
dest [ 28 ] = pt - > r28 ;
dest [ 29 ] = pt - > gp ;
dest [ 30 ] = rdusp ( ) ;
dest [ 31 ] = pt - > pc ;
/* Once upon a time this was the PS value. Which is stupid
since that is always 8 for usermode . Usurped for the more
useful value of the thread ' s UNIQUE field . */
dest [ 32 ] = ti - > pcb . unique ;
}
int
dump_elf_task ( elf_greg_t * dest , struct task_struct * task )
{
2006-01-12 01:05:37 -08:00
dump_elf_thread ( dest , task_pt_regs ( task ) , task_thread_info ( task ) ) ;
2005-04-16 15:20:36 -07:00
return 1 ;
}
int
dump_elf_task_fp ( elf_fpreg_t * dest , struct task_struct * task )
{
2006-01-12 01:05:37 -08:00
struct switch_stack * sw = ( struct switch_stack * ) task_pt_regs ( task ) - 1 ;
2005-04-16 15:20:36 -07:00
memcpy ( dest , sw - > fp , 32 * 8 ) ;
return 1 ;
}
/*
* sys_execve ( ) executes a new program .
*/
asmlinkage int
do_sys_execve ( char __user * ufilename , char __user * __user * argv ,
char __user * __user * envp , struct pt_regs * regs )
{
int error ;
char * filename ;
filename = getname ( ufilename ) ;
error = PTR_ERR ( filename ) ;
if ( IS_ERR ( filename ) )
goto out ;
error = do_execve ( filename , argv , envp , regs ) ;
putname ( filename ) ;
out :
return error ;
}
/*
* Return saved PC of a blocked thread . This assumes the frame
* pointer is the 6 th saved long on the kernel stack and that the
* saved return address is the first long in the frame . This all
* holds provided the thread blocked through a call to schedule ( ) ( $ 15
* is the frame pointer in schedule ( ) and $ 15 is saved at offset 48 by
* entry . S : do_switch_stack ) .
*
* Under heavy swap load I ' ve seen this lose in an ugly way . So do
* some extra sanity checking on the ranges we expect these pointers
* to be in so that we can fail gracefully . This is just for ps after
* all . - - r ~
*/
unsigned long
2006-07-03 00:25:41 -07:00
thread_saved_pc ( struct task_struct * t )
2005-04-16 15:20:36 -07:00
{
2006-01-12 01:05:36 -08:00
unsigned long base = ( unsigned long ) task_stack_page ( t ) ;
2006-01-12 01:05:36 -08:00
unsigned long fp , sp = task_thread_info ( t ) - > pcb . ksp ;
2005-04-16 15:20:36 -07:00
if ( sp > base & & sp + 6 * 8 < base + 16 * 1024 ) {
fp = ( ( unsigned long * ) sp ) [ 6 ] ;
if ( fp > sp & & fp < base + 16 * 1024 )
return * ( unsigned long * ) fp ;
}
return 0 ;
}
unsigned long
get_wchan ( struct task_struct * p )
{
unsigned long schedule_frame ;
unsigned long pc ;
if ( ! p | | p = = current | | p - > state = = TASK_RUNNING )
return 0 ;
/*
* This one depends on the frame size of schedule ( ) . Do a
* " disass schedule " in gdb to find the frame size . Also , the
* code assumes that sleep_on ( ) follows immediately after
* interruptible_sleep_on ( ) and that add_timer ( ) follows
* immediately after interruptible_sleep ( ) . Ugly , isn ' t it ?
* Maybe adding a wchan field to task_struct would be better ,
* after all . . .
*/
pc = thread_saved_pc ( p ) ;
if ( in_sched_functions ( pc ) ) {
2006-01-12 01:05:36 -08:00
schedule_frame = ( ( unsigned long * ) task_thread_info ( p ) - > pcb . ksp ) [ 6 ] ;
2005-04-16 15:20:36 -07:00
return ( ( unsigned long * ) schedule_frame ) [ 12 ] ;
}
return pc ;
}