2005-04-17 02:20:36 +04:00
/*
* PowerPC64 Segment Translation Support .
*
* Dave Engebretsen and Mike Corrigan { engebret | mikejc } @ us . ibm . com
* Copyright ( c ) 2001 Dave Engebretsen
*
* Copyright ( C ) 2002 Anton Blanchard < anton @ au . ibm . com > , IBM
*
* This program is free software ; you can redistribute it and / or
* modify it under the terms of the GNU General Public License
* as published by the Free Software Foundation ; either version
* 2 of the License , or ( at your option ) any later version .
*/
2010-07-12 08:36:09 +04:00
# include <linux/memblock.h>
2008-02-14 03:56:49 +03:00
2005-04-17 02:20:36 +04:00
# include <asm/pgtable.h>
# include <asm/mmu.h>
# include <asm/mmu_context.h>
# include <asm/paca.h>
# include <asm/cputable.h>
2008-02-14 03:56:49 +03:00
# include <asm/prom.h>
2005-07-27 22:44:19 +04:00
# include <asm/abs_addr.h>
2005-11-10 05:37:51 +03:00
# include <asm/firmware.h>
2007-11-19 09:44:05 +03:00
# include <asm/iseries/hv_call.h>
2005-04-17 02:20:36 +04:00
2005-05-06 03:15:13 +04:00
struct stab_entry {
unsigned long esid_data ;
unsigned long vsid_data ;
} ;
2005-04-17 02:20:36 +04:00
# define NR_STAB_CACHE_ENTRIES 8
2008-05-08 08:27:07 +04:00
static DEFINE_PER_CPU ( long , stab_cache_ptr ) ;
2009-06-24 10:13:45 +04:00
static DEFINE_PER_CPU ( long [ NR_STAB_CACHE_ENTRIES ] , stab_cache ) ;
2005-04-17 02:20:36 +04:00
/*
* Create a segment table entry for the given esid / vsid pair .
*/
static int make_ste ( unsigned long stab , unsigned long esid , unsigned long vsid )
{
unsigned long esid_data , vsid_data ;
unsigned long entry , group , old_esid , castout_entry , i ;
unsigned int global_entry ;
struct stab_entry * ste , * castout_ste ;
2005-12-05 19:24:33 +03:00
unsigned long kernel_segment = ( esid < < SID_SHIFT ) > = PAGE_OFFSET ;
2005-04-17 02:20:36 +04:00
vsid_data = vsid < < STE_VSID_SHIFT ;
esid_data = esid < < SID_SHIFT | STE_ESID_KP | STE_ESID_V ;
if ( ! kernel_segment )
esid_data | = STE_ESID_KS ;
/* Search the primary group first. */
global_entry = ( esid & 0x1f ) < < 3 ;
ste = ( struct stab_entry * ) ( stab | ( ( esid & 0x1f ) < < 7 ) ) ;
/* Find an empty entry, if one exists. */
for ( group = 0 ; group < 2 ; group + + ) {
for ( entry = 0 ; entry < 8 ; entry + + , ste + + ) {
if ( ! ( ste - > esid_data & STE_ESID_V ) ) {
ste - > vsid_data = vsid_data ;
2007-07-10 08:49:09 +04:00
eieio ( ) ;
2005-04-17 02:20:36 +04:00
ste - > esid_data = esid_data ;
return ( global_entry | entry ) ;
}
}
/* Now search the secondary group. */
global_entry = ( ( ~ esid ) & 0x1f ) < < 3 ;
ste = ( struct stab_entry * ) ( stab | ( ( ( ~ esid ) & 0x1f ) < < 7 ) ) ;
}
/*
* Could not find empty entry , pick one with a round robin selection .
* Search all entries in the two groups .
*/
castout_entry = get_paca ( ) - > stab_rr ;
for ( i = 0 ; i < 16 ; i + + ) {
if ( castout_entry < 8 ) {
global_entry = ( esid & 0x1f ) < < 3 ;
ste = ( struct stab_entry * ) ( stab | ( ( esid & 0x1f ) < < 7 ) ) ;
castout_ste = ste + castout_entry ;
} else {
global_entry = ( ( ~ esid ) & 0x1f ) < < 3 ;
ste = ( struct stab_entry * ) ( stab | ( ( ( ~ esid ) & 0x1f ) < < 7 ) ) ;
castout_ste = ste + ( castout_entry - 8 ) ;
}
/* Dont cast out the first kernel segment */
2005-12-05 19:24:33 +03:00
if ( ( castout_ste - > esid_data & ESID_MASK ) ! = PAGE_OFFSET )
2005-04-17 02:20:36 +04:00
break ;
castout_entry = ( castout_entry + 1 ) & 0xf ;
}
get_paca ( ) - > stab_rr = ( castout_entry + 1 ) & 0xf ;
/* Modify the old entry to the new value. */
/* Force previous translations to complete. DRENG */
asm volatile ( " isync " : : : " memory " ) ;
old_esid = castout_ste - > esid_data > > SID_SHIFT ;
castout_ste - > esid_data = 0 ; /* Invalidate old entry */
asm volatile ( " sync " : : : " memory " ) ; /* Order update */
castout_ste - > vsid_data = vsid_data ;
2007-07-10 08:49:09 +04:00
eieio ( ) ; /* Order update */
2005-04-17 02:20:36 +04:00
castout_ste - > esid_data = esid_data ;
asm volatile ( " slbie %0 " : : " r " ( old_esid < < SID_SHIFT ) ) ;
/* Ensure completion of slbie */
asm volatile ( " sync " : : : " memory " ) ;
return ( global_entry | ( castout_entry & 0x7 ) ) ;
}
/*
* Allocate a segment table entry for the given ea and mm
*/
static int __ste_allocate ( unsigned long ea , struct mm_struct * mm )
{
unsigned long vsid ;
unsigned char stab_entry ;
unsigned long offset ;
/* Kernel or user address? */
2005-12-04 10:39:15 +03:00
if ( is_kernel_addr ( ea ) ) {
2007-10-11 14:37:10 +04:00
vsid = get_kernel_vsid ( ea , MMU_SEGSIZE_256M ) ;
2005-04-17 02:20:36 +04:00
} else {
if ( ( ea > = TASK_SIZE_USER64 ) | | ( ! mm ) )
return 1 ;
2007-10-11 14:37:10 +04:00
vsid = get_vsid ( mm - > context . id , ea , MMU_SEGSIZE_256M ) ;
2005-04-17 02:20:36 +04:00
}
stab_entry = make_ste ( get_paca ( ) - > stab_addr , GET_ESID ( ea ) , vsid ) ;
2005-12-04 10:39:15 +03:00
if ( ! is_kernel_addr ( ea ) ) {
2005-04-17 02:20:36 +04:00
offset = __get_cpu_var ( stab_cache_ptr ) ;
if ( offset < NR_STAB_CACHE_ENTRIES )
__get_cpu_var ( stab_cache [ offset + + ] ) = stab_entry ;
else
offset = NR_STAB_CACHE_ENTRIES + 1 ;
__get_cpu_var ( stab_cache_ptr ) = offset ;
/* Order update */
asm volatile ( " sync " : : : " memory " ) ;
}
return 0 ;
}
int ste_allocate ( unsigned long ea )
{
return __ste_allocate ( ea , current - > mm ) ;
}
/*
* Do the segment table work for a context switch : flush all user
* entries from the table , then preload some probably useful entries
* for the new task
*/
void switch_stab ( struct task_struct * tsk , struct mm_struct * mm )
{
struct stab_entry * stab = ( struct stab_entry * ) get_paca ( ) - > stab_addr ;
struct stab_entry * ste ;
powerpc: Allow perf_counters to access user memory at interrupt time
This provides a mechanism to allow the perf_counters code to access
user memory in a PMU interrupt routine. Such an access can cause
various kinds of interrupt: SLB miss, MMU hash table miss, segment
table miss, or TLB miss, depending on the processor. This commit
only deals with 64-bit classic/server processors, which use an MMU
hash table. 32-bit processors are already able to access user memory
at interrupt time. Since we don't soft-disable on 32-bit, we avoid
the possibility of reentering hash_page or the TLB miss handlers,
since they run with interrupts disabled.
On 64-bit processors, an SLB miss interrupt on a user address will
update the slb_cache and slb_cache_ptr fields in the paca. This is
OK except in the case where a PMU interrupt occurs in switch_slb,
which also accesses those fields. To prevent this, we hard-disable
interrupts in switch_slb. Interrupts are already soft-disabled at
this point, and will get hard-enabled when they get soft-enabled
later.
This also reworks slb_flush_and_rebolt: to avoid hard-disabling twice,
and to make sure that it clears the slb_cache_ptr when called from
other callers than switch_slb, the existing routine is renamed to
__slb_flush_and_rebolt, which is called by switch_slb and the new
version of slb_flush_and_rebolt.
Similarly, switch_stab (used on POWER3 and RS64 processors) gets a
hard_irq_disable() to protect the per-cpu variables used there and
in ste_allocate.
If a MMU hashtable miss interrupt occurs, normally we would call
hash_page to look up the Linux PTE for the address and create a HPTE.
However, hash_page is fairly complex and takes some locks, so to
avoid the possibility of deadlock, we check the preemption count
to see if we are in a (pseudo-)NMI handler, and if so, we don't call
hash_page but instead treat it like a bad access that will get
reported up through the exception table mechanism. An interrupt
whose handler runs even though the interrupt occurred when
soft-disabled (such as the PMU interrupt) is considered a pseudo-NMI
handler, which should use nmi_enter()/nmi_exit() rather than
irq_enter()/irq_exit().
Acked-by: Benjamin Herrenschmidt <benh@kernel.crashing.org>
Signed-off-by: Paul Mackerras <paulus@samba.org>
2009-08-17 09:17:54 +04:00
unsigned long offset ;
2005-04-17 02:20:36 +04:00
unsigned long pc = KSTK_EIP ( tsk ) ;
unsigned long stack = KSTK_ESP ( tsk ) ;
unsigned long unmapped_base ;
/* Force previous translations to complete. DRENG */
asm volatile ( " isync " : : : " memory " ) ;
powerpc: Allow perf_counters to access user memory at interrupt time
This provides a mechanism to allow the perf_counters code to access
user memory in a PMU interrupt routine. Such an access can cause
various kinds of interrupt: SLB miss, MMU hash table miss, segment
table miss, or TLB miss, depending on the processor. This commit
only deals with 64-bit classic/server processors, which use an MMU
hash table. 32-bit processors are already able to access user memory
at interrupt time. Since we don't soft-disable on 32-bit, we avoid
the possibility of reentering hash_page or the TLB miss handlers,
since they run with interrupts disabled.
On 64-bit processors, an SLB miss interrupt on a user address will
update the slb_cache and slb_cache_ptr fields in the paca. This is
OK except in the case where a PMU interrupt occurs in switch_slb,
which also accesses those fields. To prevent this, we hard-disable
interrupts in switch_slb. Interrupts are already soft-disabled at
this point, and will get hard-enabled when they get soft-enabled
later.
This also reworks slb_flush_and_rebolt: to avoid hard-disabling twice,
and to make sure that it clears the slb_cache_ptr when called from
other callers than switch_slb, the existing routine is renamed to
__slb_flush_and_rebolt, which is called by switch_slb and the new
version of slb_flush_and_rebolt.
Similarly, switch_stab (used on POWER3 and RS64 processors) gets a
hard_irq_disable() to protect the per-cpu variables used there and
in ste_allocate.
If a MMU hashtable miss interrupt occurs, normally we would call
hash_page to look up the Linux PTE for the address and create a HPTE.
However, hash_page is fairly complex and takes some locks, so to
avoid the possibility of deadlock, we check the preemption count
to see if we are in a (pseudo-)NMI handler, and if so, we don't call
hash_page but instead treat it like a bad access that will get
reported up through the exception table mechanism. An interrupt
whose handler runs even though the interrupt occurred when
soft-disabled (such as the PMU interrupt) is considered a pseudo-NMI
handler, which should use nmi_enter()/nmi_exit() rather than
irq_enter()/irq_exit().
Acked-by: Benjamin Herrenschmidt <benh@kernel.crashing.org>
Signed-off-by: Paul Mackerras <paulus@samba.org>
2009-08-17 09:17:54 +04:00
/*
* We need interrupts hard - disabled here , not just soft - disabled ,
* so that a PMU interrupt can ' t occur , which might try to access
* user memory ( to get a stack trace ) and possible cause an STAB miss
* which would update the stab_cache / stab_cache_ptr per - cpu variables .
*/
hard_irq_disable ( ) ;
offset = __get_cpu_var ( stab_cache_ptr ) ;
2005-04-17 02:20:36 +04:00
if ( offset < = NR_STAB_CACHE_ENTRIES ) {
int i ;
for ( i = 0 ; i < offset ; i + + ) {
ste = stab + __get_cpu_var ( stab_cache [ i ] ) ;
ste - > esid_data = 0 ; /* invalidate entry */
}
} else {
unsigned long entry ;
/* Invalidate all entries. */
ste = stab ;
/* Never flush the first entry. */
ste + = 1 ;
for ( entry = 1 ;
2005-11-07 03:06:55 +03:00
entry < ( HW_PAGE_SIZE / sizeof ( struct stab_entry ) ) ;
2005-04-17 02:20:36 +04:00
entry + + , ste + + ) {
unsigned long ea ;
ea = ste - > esid_data & ESID_MASK ;
2005-12-04 10:39:15 +03:00
if ( ! is_kernel_addr ( ea ) ) {
2005-04-17 02:20:36 +04:00
ste - > esid_data = 0 ;
}
}
}
asm volatile ( " sync; slbia; sync " : : : " memory " ) ;
__get_cpu_var ( stab_cache_ptr ) = 0 ;
/* Now preload some entries for the new task */
if ( test_tsk_thread_flag ( tsk , TIF_32BIT ) )
unmapped_base = TASK_UNMAPPED_BASE_USER32 ;
else
unmapped_base = TASK_UNMAPPED_BASE_USER64 ;
__ste_allocate ( pc , mm ) ;
if ( GET_ESID ( pc ) = = GET_ESID ( stack ) )
return ;
__ste_allocate ( stack , mm ) ;
if ( ( GET_ESID ( pc ) = = GET_ESID ( unmapped_base ) )
| | ( GET_ESID ( stack ) = = GET_ESID ( unmapped_base ) ) )
return ;
__ste_allocate ( unmapped_base , mm ) ;
/* Order update */
asm volatile ( " sync " : : : " memory " ) ;
}
2005-07-27 22:44:19 +04:00
/*
* Allocate segment tables for secondary CPUs . These must all go in
* the first ( bolted ) segment , so that do_stab_bolted won ' t get a
* recursive segment miss on the segment table itself .
*/
2007-05-07 09:58:28 +04:00
void __init stabs_alloc ( void )
2005-07-27 22:44:19 +04:00
{
int cpu ;
2011-04-06 23:48:50 +04:00
if ( mmu_has_feature ( MMU_FTR_SLB ) )
2005-07-27 22:44:19 +04:00
return ;
2006-03-29 02:50:51 +04:00
for_each_possible_cpu ( cpu ) {
2005-07-27 22:44:19 +04:00
unsigned long newstab ;
if ( cpu = = 0 )
continue ; /* stab for CPU 0 is statically allocated */
2010-07-12 08:36:09 +04:00
newstab = memblock_alloc_base ( HW_PAGE_SIZE , HW_PAGE_SIZE ,
2005-11-07 03:06:55 +03:00
1 < < SID_SHIFT ) ;
2005-12-05 19:24:33 +03:00
newstab = ( unsigned long ) __va ( newstab ) ;
2005-07-27 22:44:19 +04:00
2005-11-07 03:06:55 +03:00
memset ( ( void * ) newstab , 0 , HW_PAGE_SIZE ) ;
2005-07-27 22:44:19 +04:00
paca [ cpu ] . stab_addr = newstab ;
paca [ cpu ] . stab_real = virt_to_abs ( newstab ) ;
2009-01-06 17:26:03 +03:00
printk ( KERN_INFO " Segment table for CPU %d at 0x%llx "
" virtual, 0x%llx absolute \n " ,
2005-11-07 03:06:55 +03:00
cpu , paca [ cpu ] . stab_addr , paca [ cpu ] . stab_real ) ;
2005-07-27 22:44:19 +04:00
}
}
2005-04-17 02:20:36 +04:00
/*
* Build an entry for the base kernel segment and put it into
* the segment table or SLB . All other segment table or SLB
* entries are faulted in .
*/
void stab_initialize ( unsigned long stab )
{
2007-10-11 14:37:10 +04:00
unsigned long vsid = get_kernel_vsid ( PAGE_OFFSET , MMU_SEGSIZE_256M ) ;
2005-11-10 05:37:51 +03:00
unsigned long stabreal ;
2005-04-17 02:20:36 +04:00
2005-11-07 03:06:55 +03:00
asm volatile ( " isync; slbia; isync " : : : " memory " ) ;
2005-12-05 19:24:33 +03:00
make_ste ( stab , GET_ESID ( PAGE_OFFSET ) , vsid ) ;
2005-04-17 02:20:36 +04:00
2005-11-07 03:06:55 +03:00
/* Order update */
asm volatile ( " sync " : : : " memory " ) ;
2005-11-10 05:37:51 +03:00
/* Set ASR */
stabreal = get_paca ( ) - > stab_real | 0x1ul ;
# ifdef CONFIG_PPC_ISERIES
if ( firmware_has_feature ( FW_FEATURE_ISERIES ) ) {
HvCall1 ( HvCallBaseSetASR , stabreal ) ;
return ;
}
# endif /* CONFIG_PPC_ISERIES */
2005-11-29 23:04:17 +03:00
2005-11-10 05:37:51 +03:00
mtspr ( SPRN_ASR , stabreal ) ;
2005-04-17 02:20:36 +04:00
}