2010-05-28 23:09:12 -04:00
/*
* Copyright 2010 Tilera Corporation . All Rights Reserved .
*
* This program is free software ; you can redistribute it and / or
* modify it under the terms of the GNU General Public License
* as published by the Free Software Foundation , version 2.
*
* This program is distributed in the hope that it will be useful , but
* WITHOUT ANY WARRANTY ; without even the implied warranty of
* MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE , GOOD TITLE or
* NON INFRINGEMENT . See the GNU General Public License for
* more details .
*/
# include <linux/string.h>
# include <linux/smp.h>
# include <linux/module.h>
# include <linux/uaccess.h>
# include <asm/fixmap.h>
# include <asm/kmap_types.h>
# include <asm/tlbflush.h>
# include <hv/hypervisor.h>
# include <arch/chip.h>
# if !CHIP_HAS_COHERENT_LOCAL_CACHE()
/* Defined in memcpy.S */
extern unsigned long __memcpy_asm ( void * to , const void * from , unsigned long n ) ;
extern unsigned long __copy_to_user_inatomic_asm (
void __user * to , const void * from , unsigned long n ) ;
extern unsigned long __copy_from_user_inatomic_asm (
void * to , const void __user * from , unsigned long n ) ;
extern unsigned long __copy_from_user_zeroing_asm (
void * to , const void __user * from , unsigned long n ) ;
typedef unsigned long ( * memcpy_t ) ( void * , const void * , unsigned long ) ;
/* Size above which to consider TLB games for performance */
# define LARGE_COPY_CUTOFF 2048
/* Communicate to the simulator what we are trying to do. */
# define sim_allow_multiple_caching(b) \
__insn_mtspr ( SPR_SIM_CONTROL , \
SIM_CONTROL_ALLOW_MULTIPLE_CACHING | ( ( b ) < < _SIM_CONTROL_OPERATOR_BITS ) )
/*
* Copy memory by briefly enabling incoherent cacheline - at - a - time mode .
*
* We set up our own source and destination PTEs that we fully control .
* This is the only way to guarantee that we don ' t race with another
* thread that is modifying the PTE ; we can ' t afford to try the
* copy_ { to , from } _user ( ) technique of catching the interrupt , since
* we must run with interrupts disabled to avoid the risk of some
* other code seeing the incoherent data in our cache . ( Recall that
* our cache is indexed by PA , so even if the other code doesn ' t use
2010-11-01 15:21:35 -04:00
* our kmap_atomic virtual addresses , they ' ll still hit in cache using
2010-05-28 23:09:12 -04:00
* the normal VAs that aren ' t supposed to hit in cache . )
*/
static void memcpy_multicache ( void * dest , const void * source ,
pte_t dst_pte , pte_t src_pte , int len )
{
2010-06-25 17:04:17 -04:00
int idx ;
unsigned long flags , newsrc , newdst ;
2010-05-28 23:09:12 -04:00
pmd_t * pmdp ;
pte_t * ptep ;
2010-11-01 15:21:35 -04:00
int type0 , type1 ;
2010-05-28 23:09:12 -04:00
int cpu = get_cpu ( ) ;
/*
* Disable interrupts so that we don ' t recurse into memcpy ( )
* in an interrupt handler , nor accidentally reference
* the PA of the source from an interrupt routine . Also
* notify the simulator that we ' re playing games so we don ' t
* generate spurious coherency warnings .
*/
local_irq_save ( flags ) ;
sim_allow_multiple_caching ( 1 ) ;
/* Set up the new dest mapping */
2010-11-01 15:21:35 -04:00
type0 = kmap_atomic_idx_push ( ) ;
idx = FIX_KMAP_BEGIN + ( KM_TYPE_NR * cpu ) + type0 ;
2010-05-28 23:09:12 -04:00
newdst = __fix_to_virt ( idx ) + ( ( unsigned long ) dest & ( PAGE_SIZE - 1 ) ) ;
pmdp = pmd_offset ( pud_offset ( pgd_offset_k ( newdst ) , newdst ) , newdst ) ;
ptep = pte_offset_kernel ( pmdp , newdst ) ;
if ( pte_val ( * ptep ) ! = pte_val ( dst_pte ) ) {
set_pte ( ptep , dst_pte ) ;
local_flush_tlb_page ( NULL , newdst , PAGE_SIZE ) ;
}
/* Set up the new source mapping */
2010-11-01 15:21:35 -04:00
type1 = kmap_atomic_idx_push ( ) ;
idx + = ( type0 - type1 ) ;
2010-05-28 23:09:12 -04:00
src_pte = hv_pte_set_nc ( src_pte ) ;
src_pte = hv_pte_clear_writable ( src_pte ) ; /* be paranoid */
newsrc = __fix_to_virt ( idx ) + ( ( unsigned long ) source & ( PAGE_SIZE - 1 ) ) ;
pmdp = pmd_offset ( pud_offset ( pgd_offset_k ( newsrc ) , newsrc ) , newsrc ) ;
ptep = pte_offset_kernel ( pmdp , newsrc ) ;
2011-02-28 16:37:34 -05:00
__set_pte ( ptep , src_pte ) ; /* set_pte() would be confused by this */
2010-05-28 23:09:12 -04:00
local_flush_tlb_page ( NULL , newsrc , PAGE_SIZE ) ;
/* Actually move the data. */
__memcpy_asm ( ( void * ) newdst , ( const void * ) newsrc , len ) ;
/*
* Remap the source as locally - cached and not OLOC ' ed so that
* we can inval without also invaling the remote cpu ' s cache .
* This also avoids known errata with inv ' ing cacheable oloc data .
*/
src_pte = hv_pte_set_mode ( src_pte , HV_PTE_MODE_CACHE_NO_L3 ) ;
src_pte = hv_pte_set_writable ( src_pte ) ; /* need write access for inv */
2011-02-28 16:37:34 -05:00
__set_pte ( ptep , src_pte ) ; /* set_pte() would be confused by this */
2010-05-28 23:09:12 -04:00
local_flush_tlb_page ( NULL , newsrc , PAGE_SIZE ) ;
/*
* Do the actual invalidation , covering the full L2 cache line
* at the end since __memcpy_asm ( ) is somewhat aggressive .
*/
__inv_buffer ( ( void * ) newsrc , len ) ;
/*
* We ' re done : notify the simulator that all is back to normal ,
* and re - enable interrupts and pre - emption .
*/
2010-11-01 15:21:35 -04:00
kmap_atomic_idx_pop ( ) ;
kmap_atomic_idx_pop ( ) ;
2010-05-28 23:09:12 -04:00
sim_allow_multiple_caching ( 0 ) ;
local_irq_restore ( flags ) ;
2010-06-25 17:04:17 -04:00
put_cpu ( ) ;
2010-05-28 23:09:12 -04:00
}
/*
* Identify large copies from remotely - cached memory , and copy them
* via memcpy_multicache ( ) if they look good , otherwise fall back
* to the particular kind of copying passed as the memcpy_t function .
*/
static unsigned long fast_copy ( void * dest , const void * source , int len ,
memcpy_t func )
{
/*
* Check if it ' s big enough to bother with . We may end up doing a
* small copy via TLB manipulation if we ' re near a page boundary ,
* but presumably we ' ll make it up when we hit the second page .
*/
while ( len > = LARGE_COPY_CUTOFF ) {
int copy_size , bytes_left_on_page ;
pte_t * src_ptep , * dst_ptep ;
pte_t src_pte , dst_pte ;
struct page * src_page , * dst_page ;
/* Is the source page oloc'ed to a remote cpu? */
retry_source :
src_ptep = virt_to_pte ( current - > mm , ( unsigned long ) source ) ;
if ( src_ptep = = NULL )
break ;
src_pte = * src_ptep ;
if ( ! hv_pte_get_present ( src_pte ) | |
! hv_pte_get_readable ( src_pte ) | |
hv_pte_get_mode ( src_pte ) ! = HV_PTE_MODE_CACHE_TILE_L3 )
break ;
if ( get_remote_cache_cpu ( src_pte ) = = smp_processor_id ( ) )
break ;
2012-03-29 13:58:43 -04:00
src_page = pfn_to_page ( pte_pfn ( src_pte ) ) ;
2010-05-28 23:09:12 -04:00
get_page ( src_page ) ;
if ( pte_val ( src_pte ) ! = pte_val ( * src_ptep ) ) {
put_page ( src_page ) ;
goto retry_source ;
}
if ( pte_huge ( src_pte ) ) {
/* Adjust the PTE to correspond to a small page */
2012-03-29 13:58:43 -04:00
int pfn = pte_pfn ( src_pte ) ;
2010-05-28 23:09:12 -04:00
pfn + = ( ( ( unsigned long ) source & ( HPAGE_SIZE - 1 ) )
> > PAGE_SHIFT ) ;
src_pte = pfn_pte ( pfn , src_pte ) ;
src_pte = pte_mksmall ( src_pte ) ;
}
/* Is the destination page writable? */
retry_dest :
dst_ptep = virt_to_pte ( current - > mm , ( unsigned long ) dest ) ;
if ( dst_ptep = = NULL ) {
put_page ( src_page ) ;
break ;
}
dst_pte = * dst_ptep ;
if ( ! hv_pte_get_present ( dst_pte ) | |
! hv_pte_get_writable ( dst_pte ) ) {
put_page ( src_page ) ;
break ;
}
2012-03-29 13:58:43 -04:00
dst_page = pfn_to_page ( pte_pfn ( dst_pte ) ) ;
2010-05-28 23:09:12 -04:00
if ( dst_page = = src_page ) {
/*
* Source and dest are on the same page ; this
* potentially exposes us to incoherence if any
* part of src and dest overlap on a cache line .
* Just give up rather than trying to be precise .
*/
put_page ( src_page ) ;
break ;
}
get_page ( dst_page ) ;
if ( pte_val ( dst_pte ) ! = pte_val ( * dst_ptep ) ) {
put_page ( dst_page ) ;
goto retry_dest ;
}
if ( pte_huge ( dst_pte ) ) {
/* Adjust the PTE to correspond to a small page */
2012-03-29 13:58:43 -04:00
int pfn = pte_pfn ( dst_pte ) ;
2010-05-28 23:09:12 -04:00
pfn + = ( ( ( unsigned long ) dest & ( HPAGE_SIZE - 1 ) )
> > PAGE_SHIFT ) ;
dst_pte = pfn_pte ( pfn , dst_pte ) ;
dst_pte = pte_mksmall ( dst_pte ) ;
}
/* All looks good: create a cachable PTE and copy from it */
copy_size = len ;
bytes_left_on_page =
PAGE_SIZE - ( ( ( int ) source ) & ( PAGE_SIZE - 1 ) ) ;
if ( copy_size > bytes_left_on_page )
copy_size = bytes_left_on_page ;
bytes_left_on_page =
PAGE_SIZE - ( ( ( int ) dest ) & ( PAGE_SIZE - 1 ) ) ;
if ( copy_size > bytes_left_on_page )
copy_size = bytes_left_on_page ;
memcpy_multicache ( dest , source , dst_pte , src_pte , copy_size ) ;
/* Release the pages */
put_page ( dst_page ) ;
put_page ( src_page ) ;
/* Continue on the next page */
dest + = copy_size ;
source + = copy_size ;
len - = copy_size ;
}
return func ( dest , source , len ) ;
}
void * memcpy ( void * to , const void * from , __kernel_size_t n )
{
if ( n < LARGE_COPY_CUTOFF )
return ( void * ) __memcpy_asm ( to , from , n ) ;
else
return ( void * ) fast_copy ( to , from , n , __memcpy_asm ) ;
}
unsigned long __copy_to_user_inatomic ( void __user * to , const void * from ,
unsigned long n )
{
if ( n < LARGE_COPY_CUTOFF )
return __copy_to_user_inatomic_asm ( to , from , n ) ;
else
return fast_copy ( to , from , n , __copy_to_user_inatomic_asm ) ;
}
unsigned long __copy_from_user_inatomic ( void * to , const void __user * from ,
unsigned long n )
{
if ( n < LARGE_COPY_CUTOFF )
return __copy_from_user_inatomic_asm ( to , from , n ) ;
else
return fast_copy ( to , from , n , __copy_from_user_inatomic_asm ) ;
}
unsigned long __copy_from_user_zeroing ( void * to , const void __user * from ,
unsigned long n )
{
if ( n < LARGE_COPY_CUTOFF )
return __copy_from_user_zeroing_asm ( to , from , n ) ;
else
return fast_copy ( to , from , n , __copy_from_user_zeroing_asm ) ;
}
# endif /* !CHIP_HAS_COHERENT_LOCAL_CACHE() */