diff --git a/arch/ia64/kernel/entry.S b/arch/ia64/kernel/entry.S index f5d3efbfbeda..3c331c464b40 100644 --- a/arch/ia64/kernel/entry.S +++ b/arch/ia64/kernel/entry.S @@ -1573,7 +1573,7 @@ sys_call_table: data8 sys_fchmodat data8 sys_faccessat data8 sys_pselect6 - data8 sys_ppoll + data8 sys_ppoll // 1295 data8 sys_unshare data8 sys_splice data8 sys_set_robust_list @@ -1588,5 +1588,8 @@ sys_call_table: data8 sys_signalfd data8 sys_ni_syscall data8 sys_eventfd + data8 sys_timerfd_create // 1310 + data8 sys_timerfd_settime + data8 sys_timerfd_gettime .org sys_call_table + 8*NR_syscalls // guard against failures to increase NR_syscalls diff --git a/arch/ia64/kernel/mca.c b/arch/ia64/kernel/mca.c index 846e7e036b13..6e17aed53135 100644 --- a/arch/ia64/kernel/mca.c +++ b/arch/ia64/kernel/mca.c @@ -17,7 +17,7 @@ * Copyright (C) 2000 Intel * Copyright (C) Chuck Fleckenstein * - * Copyright (C) 1999, 2004 Silicon Graphics, Inc. + * Copyright (C) 1999, 2004-2008 Silicon Graphics, Inc. * Copyright (C) Vijay Chander * * Copyright (C) 2006 FUJITSU LIMITED @@ -1762,11 +1762,8 @@ format_mca_init_stack(void *mca_data, unsigned long offset, /* Caller prevents this from being called after init */ static void * __init_refok mca_bootmem(void) { - void *p; - - p = alloc_bootmem(sizeof(struct ia64_mca_cpu) * NR_CPUS + - KERNEL_STACK_SIZE); - return (void *)ALIGN((unsigned long)p, KERNEL_STACK_SIZE); + return __alloc_bootmem(sizeof(struct ia64_mca_cpu), + KERNEL_STACK_SIZE, 0); } /* Do per-CPU MCA-related initialization. */ @@ -1774,33 +1771,33 @@ void __cpuinit ia64_mca_cpu_init(void *cpu_data) { void *pal_vaddr; + void *data; + long sz = sizeof(struct ia64_mca_cpu); + int cpu = smp_processor_id(); static int first_time = 1; - if (first_time) { - void *mca_data; - int cpu; - - first_time = 0; - mca_data = mca_bootmem(); - for (cpu = 0; cpu < NR_CPUS; cpu++) { - format_mca_init_stack(mca_data, - offsetof(struct ia64_mca_cpu, mca_stack), - "MCA", cpu); - format_mca_init_stack(mca_data, - offsetof(struct ia64_mca_cpu, init_stack), - "INIT", cpu); - __per_cpu_mca[cpu] = __pa(mca_data); - mca_data += sizeof(struct ia64_mca_cpu); - } - } - /* - * The MCA info structure was allocated earlier and its - * physical address saved in __per_cpu_mca[cpu]. Copy that - * address * to ia64_mca_data so we can access it as a per-CPU - * variable. + * Structure will already be allocated if cpu has been online, + * then offlined. */ - __get_cpu_var(ia64_mca_data) = __per_cpu_mca[smp_processor_id()]; + if (__per_cpu_mca[cpu]) { + data = __va(__per_cpu_mca[cpu]); + } else { + if (first_time) { + data = mca_bootmem(); + first_time = 0; + } else + data = page_address(alloc_pages_node(numa_node_id(), + GFP_KERNEL, get_order(sz))); + if (!data) + panic("Could not allocate MCA memory for cpu %d\n", + cpu); + } + format_mca_init_stack(data, offsetof(struct ia64_mca_cpu, mca_stack), + "MCA", cpu); + format_mca_init_stack(data, offsetof(struct ia64_mca_cpu, init_stack), + "INIT", cpu); + __get_cpu_var(ia64_mca_data) = __per_cpu_mca[cpu] = __pa(data); /* * Stash away a copy of the PTE needed to map the per-CPU page. diff --git a/arch/ia64/kernel/perfmon.c b/arch/ia64/kernel/perfmon.c index 78acd9fe97e9..f6b99719f10f 100644 --- a/arch/ia64/kernel/perfmon.c +++ b/arch/ia64/kernel/perfmon.c @@ -585,21 +585,6 @@ pfm_put_task(struct task_struct *task) if (task != current) put_task_struct(task); } -static inline void -pfm_set_task_notify(struct task_struct *task) -{ - struct thread_info *info; - - info = (struct thread_info *) ((char *) task + IA64_TASK_SIZE); - set_bit(TIF_PERFMON_WORK, &info->flags); -} - -static inline void -pfm_clear_task_notify(void) -{ - clear_thread_flag(TIF_PERFMON_WORK); -} - static inline void pfm_reserve_page(unsigned long a) { @@ -3724,7 +3709,7 @@ pfm_restart(pfm_context_t *ctx, void *arg, int count, struct pt_regs *regs) PFM_SET_WORK_PENDING(task, 1); - pfm_set_task_notify(task); + tsk_set_notify_resume(task); /* * XXX: send reschedule if task runs on another CPU @@ -5082,7 +5067,7 @@ pfm_handle_work(void) PFM_SET_WORK_PENDING(current, 0); - pfm_clear_task_notify(); + tsk_clear_notify_resume(current); regs = task_pt_regs(current); @@ -5450,7 +5435,7 @@ pfm_overflow_handler(struct task_struct *task, pfm_context_t *ctx, u64 pmc0, str * when coming from ctxsw, current still points to the * previous task, therefore we must work with task and not current. */ - pfm_set_task_notify(task); + tsk_set_notify_resume(task); } /* * defer until state is changed (shorten spin window). the context is locked diff --git a/arch/ia64/kernel/process.c b/arch/ia64/kernel/process.c index 7377d323131d..49937a383b23 100644 --- a/arch/ia64/kernel/process.c +++ b/arch/ia64/kernel/process.c @@ -52,7 +52,6 @@ #include "sigframe.h" void (*ia64_mark_idle)(int); -static DEFINE_PER_CPU(unsigned int, cpu_idle_state); unsigned long boot_option_idle_override = 0; EXPORT_SYMBOL(boot_option_idle_override); @@ -157,6 +156,17 @@ show_regs (struct pt_regs *regs) show_stack(NULL, NULL); } +void tsk_clear_notify_resume(struct task_struct *tsk) +{ +#ifdef CONFIG_PERFMON + if (tsk->thread.pfm_needs_checking) + return; +#endif + if (test_ti_thread_flag(task_thread_info(tsk), TIF_RESTORE_RSE)) + return; + clear_ti_thread_flag(task_thread_info(tsk), TIF_NOTIFY_RESUME); +} + void do_notify_resume_user (sigset_t *unused, struct sigscratch *scr, long in_syscall) { @@ -175,6 +185,10 @@ do_notify_resume_user (sigset_t *unused, struct sigscratch *scr, long in_syscall /* deal with pending signal delivery */ if (test_thread_flag(TIF_SIGPENDING)||test_thread_flag(TIF_RESTORE_SIGMASK)) ia64_do_signal(scr, in_syscall); + + /* copy user rbs to kernel rbs */ + if (unlikely(test_thread_flag(TIF_RESTORE_RSE))) + ia64_sync_krbs(); } static int pal_halt = 1; @@ -239,33 +253,23 @@ static inline void play_dead(void) } #endif /* CONFIG_HOTPLUG_CPU */ +static void do_nothing(void *unused) +{ +} + +/* + * cpu_idle_wait - Used to ensure that all the CPUs discard old value of + * pm_idle and update to new pm_idle value. Required while changing pm_idle + * handler on SMP systems. + * + * Caller must have changed pm_idle to the new value before the call. Old + * pm_idle value will not be used by any CPU after the return of this function. + */ void cpu_idle_wait(void) { - unsigned int cpu, this_cpu = get_cpu(); - cpumask_t map; - cpumask_t tmp = current->cpus_allowed; - - set_cpus_allowed(current, cpumask_of_cpu(this_cpu)); - put_cpu(); - - cpus_clear(map); - for_each_online_cpu(cpu) { - per_cpu(cpu_idle_state, cpu) = 1; - cpu_set(cpu, map); - } - - __get_cpu_var(cpu_idle_state) = 0; - - wmb(); - do { - ssleep(1); - for_each_online_cpu(cpu) { - if (cpu_isset(cpu, map) && !per_cpu(cpu_idle_state, cpu)) - cpu_clear(cpu, map); - } - cpus_and(map, map, cpu_online_map); - } while (!cpus_empty(map)); - set_cpus_allowed(current, tmp); + smp_mb(); + /* kick all the CPUs so that they exit out of pm_idle */ + smp_call_function(do_nothing, NULL, 0, 1); } EXPORT_SYMBOL_GPL(cpu_idle_wait); @@ -293,9 +297,6 @@ cpu_idle (void) #ifdef CONFIG_SMP min_xtp(); #endif - if (__get_cpu_var(cpu_idle_state)) - __get_cpu_var(cpu_idle_state) = 0; - rmb(); if (mark_idle) (*mark_idle)(1); diff --git a/arch/ia64/kernel/ptrace.c b/arch/ia64/kernel/ptrace.c index 2e96f17b2f3b..331d6768b5d5 100644 --- a/arch/ia64/kernel/ptrace.c +++ b/arch/ia64/kernel/ptrace.c @@ -547,6 +547,129 @@ ia64_sync_user_rbs (struct task_struct *child, struct switch_stack *sw, return 0; } +static long +ia64_sync_kernel_rbs (struct task_struct *child, struct switch_stack *sw, + unsigned long user_rbs_start, unsigned long user_rbs_end) +{ + unsigned long addr, val; + long ret; + + /* now copy word for word from user rbs to kernel rbs: */ + for (addr = user_rbs_start; addr < user_rbs_end; addr += 8) { + if (access_process_vm(child, addr, &val, sizeof(val), 0) + != sizeof(val)) + return -EIO; + + ret = ia64_poke(child, sw, user_rbs_end, addr, val); + if (ret < 0) + return ret; + } + return 0; +} + +typedef long (*syncfunc_t)(struct task_struct *, struct switch_stack *, + unsigned long, unsigned long); + +static void do_sync_rbs(struct unw_frame_info *info, void *arg) +{ + struct pt_regs *pt; + unsigned long urbs_end; + syncfunc_t fn = arg; + + if (unw_unwind_to_user(info) < 0) + return; + pt = task_pt_regs(info->task); + urbs_end = ia64_get_user_rbs_end(info->task, pt, NULL); + + fn(info->task, info->sw, pt->ar_bspstore, urbs_end); +} + +/* + * when a thread is stopped (ptraced), debugger might change thread's user + * stack (change memory directly), and we must avoid the RSE stored in kernel + * to override user stack (user space's RSE is newer than kernel's in the + * case). To workaround the issue, we copy kernel RSE to user RSE before the + * task is stopped, so user RSE has updated data. we then copy user RSE to + * kernel after the task is resummed from traced stop and kernel will use the + * newer RSE to return to user. TIF_RESTORE_RSE is the flag to indicate we need + * synchronize user RSE to kernel. + */ +void ia64_ptrace_stop(void) +{ + if (test_and_set_tsk_thread_flag(current, TIF_RESTORE_RSE)) + return; + tsk_set_notify_resume(current); + unw_init_running(do_sync_rbs, ia64_sync_user_rbs); +} + +/* + * This is called to read back the register backing store. + */ +void ia64_sync_krbs(void) +{ + clear_tsk_thread_flag(current, TIF_RESTORE_RSE); + tsk_clear_notify_resume(current); + + unw_init_running(do_sync_rbs, ia64_sync_kernel_rbs); +} + +/* + * After PTRACE_ATTACH, a thread's register backing store area in user + * space is assumed to contain correct data whenever the thread is + * stopped. arch_ptrace_stop takes care of this on tracing stops. + * But if the child was already stopped for job control when we attach + * to it, then it might not ever get into ptrace_stop by the time we + * want to examine the user memory containing the RBS. + */ +void +ptrace_attach_sync_user_rbs (struct task_struct *child) +{ + int stopped = 0; + struct unw_frame_info info; + + /* + * If the child is in TASK_STOPPED, we need to change that to + * TASK_TRACED momentarily while we operate on it. This ensures + * that the child won't be woken up and return to user mode while + * we are doing the sync. (It can only be woken up for SIGKILL.) + */ + + read_lock(&tasklist_lock); + if (child->signal) { + spin_lock_irq(&child->sighand->siglock); + if (child->state == TASK_STOPPED && + !test_and_set_tsk_thread_flag(child, TIF_RESTORE_RSE)) { + tsk_set_notify_resume(child); + + child->state = TASK_TRACED; + stopped = 1; + } + spin_unlock_irq(&child->sighand->siglock); + } + read_unlock(&tasklist_lock); + + if (!stopped) + return; + + unw_init_from_blocked_task(&info, child); + do_sync_rbs(&info, ia64_sync_user_rbs); + + /* + * Now move the child back into TASK_STOPPED if it should be in a + * job control stop, so that SIGCONT can be used to wake it up. + */ + read_lock(&tasklist_lock); + if (child->signal) { + spin_lock_irq(&child->sighand->siglock); + if (child->state == TASK_TRACED && + (child->signal->flags & SIGNAL_STOP_STOPPED)) { + child->state = TASK_STOPPED; + } + spin_unlock_irq(&child->sighand->siglock); + } + read_unlock(&tasklist_lock); +} + static inline int thread_matches (struct task_struct *thread, unsigned long addr) { @@ -1422,6 +1545,7 @@ sys_ptrace (long request, pid_t pid, unsigned long addr, unsigned long data) struct task_struct *child; struct switch_stack *sw; long ret; + struct unw_frame_info info; lock_kernel(); ret = -EPERM; @@ -1453,6 +1577,8 @@ sys_ptrace (long request, pid_t pid, unsigned long addr, unsigned long data) if (request == PTRACE_ATTACH) { ret = ptrace_attach(child); + if (!ret) + arch_ptrace_attach(child); goto out_tsk; } @@ -1481,6 +1607,11 @@ sys_ptrace (long request, pid_t pid, unsigned long addr, unsigned long data) /* write the word at location addr */ urbs_end = ia64_get_user_rbs_end(child, pt, NULL); ret = ia64_poke(child, sw, urbs_end, addr, data); + + /* Make sure user RBS has the latest data */ + unw_init_from_blocked_task(&info, child); + do_sync_rbs(&info, ia64_sync_user_rbs); + goto out_tsk; case PTRACE_PEEKUSR: @@ -1634,6 +1765,10 @@ syscall_trace_enter (long arg0, long arg1, long arg2, long arg3, && (current->ptrace & PT_PTRACED)) syscall_trace(); + /* copy user rbs to kernel rbs */ + if (test_thread_flag(TIF_RESTORE_RSE)) + ia64_sync_krbs(); + if (unlikely(current->audit_context)) { long syscall; int arch; @@ -1671,4 +1806,8 @@ syscall_trace_leave (long arg0, long arg1, long arg2, long arg3, || test_thread_flag(TIF_SINGLESTEP)) && (current->ptrace & PT_PTRACED)) syscall_trace(); + + /* copy user rbs to kernel rbs */ + if (test_thread_flag(TIF_RESTORE_RSE)) + ia64_sync_krbs(); } diff --git a/include/asm-ia64/ptrace.h b/include/asm-ia64/ptrace.h index f4ef87a36236..0bdce7dde1b0 100644 --- a/include/asm-ia64/ptrace.h +++ b/include/asm-ia64/ptrace.h @@ -292,6 +292,7 @@ struct switch_stack { unsigned long, long); extern void ia64_flush_fph (struct task_struct *); extern void ia64_sync_fph (struct task_struct *); + extern void ia64_sync_krbs(void); extern long ia64_sync_user_rbs (struct task_struct *, struct switch_stack *, unsigned long, unsigned long); @@ -303,6 +304,16 @@ struct switch_stack { extern void ia64_increment_ip (struct pt_regs *pt); extern void ia64_decrement_ip (struct pt_regs *pt); + extern void ia64_ptrace_stop(void); + #define arch_ptrace_stop(code, info) \ + ia64_ptrace_stop() + #define arch_ptrace_stop_needed(code, info) \ + (!test_thread_flag(TIF_RESTORE_RSE)) + + extern void ptrace_attach_sync_user_rbs (struct task_struct *); + #define arch_ptrace_attach(child) \ + ptrace_attach_sync_user_rbs(child) + #endif /* !__KERNEL__ */ /* pt_all_user_regs is used for PTRACE_GETREGS PTRACE_SETREGS */ diff --git a/include/asm-ia64/thread_info.h b/include/asm-ia64/thread_info.h index d16031e72efa..93d83cbe0c8c 100644 --- a/include/asm-ia64/thread_info.h +++ b/include/asm-ia64/thread_info.h @@ -71,6 +71,9 @@ struct thread_info { #define alloc_task_struct() ((struct task_struct *)__get_free_pages(GFP_KERNEL | __GFP_COMP, KERNEL_STACK_SIZE_ORDER)) #define free_task_struct(tsk) free_pages((unsigned long) (tsk), KERNEL_STACK_SIZE_ORDER) +#define tsk_set_notify_resume(tsk) \ + set_ti_thread_flag(task_thread_info(tsk), TIF_NOTIFY_RESUME) +extern void tsk_clear_notify_resume(struct task_struct *tsk); #endif /* !__ASSEMBLY */ /* @@ -85,28 +88,30 @@ struct thread_info { #define TIF_SYSCALL_AUDIT 3 /* syscall auditing active */ #define TIF_SINGLESTEP 4 /* restore singlestep on return to user mode */ #define TIF_RESTORE_SIGMASK 5 /* restore signal mask in do_signal() */ -#define TIF_PERFMON_WORK 6 /* work for pfm_handle_work() */ +#define TIF_NOTIFY_RESUME 6 /* resumption notification requested */ #define TIF_POLLING_NRFLAG 16 /* true if poll_idle() is polling TIF_NEED_RESCHED */ #define TIF_MEMDIE 17 #define TIF_MCA_INIT 18 /* this task is processing MCA or INIT */ #define TIF_DB_DISABLED 19 /* debug trap disabled for fsyscall */ #define TIF_FREEZE 20 /* is freezing for suspend */ +#define TIF_RESTORE_RSE 21 /* user RBS is newer than kernel RBS */ #define _TIF_SYSCALL_TRACE (1 << TIF_SYSCALL_TRACE) #define _TIF_SYSCALL_AUDIT (1 << TIF_SYSCALL_AUDIT) #define _TIF_SINGLESTEP (1 << TIF_SINGLESTEP) #define _TIF_SYSCALL_TRACEAUDIT (_TIF_SYSCALL_TRACE|_TIF_SYSCALL_AUDIT|_TIF_SINGLESTEP) #define _TIF_RESTORE_SIGMASK (1 << TIF_RESTORE_SIGMASK) -#define _TIF_PERFMON_WORK (1 << TIF_PERFMON_WORK) +#define _TIF_NOTIFY_RESUME (1 << TIF_NOTIFY_RESUME) #define _TIF_SIGPENDING (1 << TIF_SIGPENDING) #define _TIF_NEED_RESCHED (1 << TIF_NEED_RESCHED) #define _TIF_POLLING_NRFLAG (1 << TIF_POLLING_NRFLAG) #define _TIF_MCA_INIT (1 << TIF_MCA_INIT) #define _TIF_DB_DISABLED (1 << TIF_DB_DISABLED) #define _TIF_FREEZE (1 << TIF_FREEZE) +#define _TIF_RESTORE_RSE (1 << TIF_RESTORE_RSE) /* "work to do on user-return" bits */ -#define TIF_ALLWORK_MASK (_TIF_SIGPENDING|_TIF_PERFMON_WORK|_TIF_SYSCALL_AUDIT|\ +#define TIF_ALLWORK_MASK (_TIF_SIGPENDING|_TIF_NOTIFY_RESUME|_TIF_SYSCALL_AUDIT|\ _TIF_NEED_RESCHED| _TIF_SYSCALL_TRACE|\ _TIF_RESTORE_SIGMASK) /* like TIF_ALLWORK_BITS but sans TIF_SYSCALL_TRACE or TIF_SYSCALL_AUDIT */ diff --git a/include/asm-ia64/unistd.h b/include/asm-ia64/unistd.h index 315f8de950a2..e60314716122 100644 --- a/include/asm-ia64/unistd.h +++ b/include/asm-ia64/unistd.h @@ -299,11 +299,14 @@ #define __NR_signalfd 1307 #define __NR_timerfd 1308 #define __NR_eventfd 1309 +#define __NR_timerfd_create 1310 +#define __NR_timerfd_settime 1311 +#define __NR_timerfd_gettime 1312 #ifdef __KERNEL__ -#define NR_syscalls 286 /* length of syscall table */ +#define NR_syscalls 289 /* length of syscall table */ /* * The following defines stop scripts/checksyscalls.sh from complaining about