2020-12-15 01:36:54 -06:00
// SPDX-License-Identifier: GPL-2.0-only
/*
* CPU - agnostic AMD IO page table allocator .
*
* Copyright ( C ) 2020 Advanced Micro Devices , Inc .
* Author : Suravee Suthikulpanit < suravee . suthikulpanit @ amd . com >
*/
# define pr_fmt(fmt) "AMD-Vi: " fmt
# define dev_fmt(fmt) pr_fmt(fmt)
# include <linux/atomic.h>
# include <linux/bitops.h>
# include <linux/io-pgtable.h>
# include <linux/kernel.h>
# include <linux/sizes.h>
# include <linux/slab.h>
# include <linux/types.h>
# include <linux/dma-mapping.h>
# include <asm/barrier.h>
# include "amd_iommu_types.h"
# include "amd_iommu.h"
static void v1_tlb_flush_all ( void * cookie )
{
}
static void v1_tlb_flush_walk ( unsigned long iova , size_t size ,
size_t granule , void * cookie )
{
}
static void v1_tlb_add_page ( struct iommu_iotlb_gather * gather ,
unsigned long iova , size_t granule ,
void * cookie )
{
}
static const struct iommu_flush_ops v1_flush_ops = {
. tlb_flush_all = v1_tlb_flush_all ,
. tlb_flush_walk = v1_tlb_flush_walk ,
. tlb_add_page = v1_tlb_add_page ,
} ;
2020-12-15 01:36:58 -06:00
/*
* Helper function to get the first pte of a large mapping
*/
static u64 * first_pte_l7 ( u64 * pte , unsigned long * page_size ,
unsigned long * count )
{
unsigned long pte_mask , pg_size , cnt ;
u64 * fpte ;
pg_size = PTE_PAGE_SIZE ( * pte ) ;
cnt = PAGE_SIZE_PTE_COUNT ( pg_size ) ;
pte_mask = ~ ( ( cnt < < 3 ) - 1 ) ;
fpte = ( u64 * ) ( ( ( unsigned long ) pte ) & pte_mask ) ;
if ( page_size )
* page_size = pg_size ;
if ( count )
* count = cnt ;
return fpte ;
}
/****************************************************************************
*
* The functions below are used the create the page table mappings for
* unity mapped regions .
*
* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * */
static void free_page_list ( struct page * freelist )
{
while ( freelist ! = NULL ) {
unsigned long p = ( unsigned long ) page_address ( freelist ) ;
freelist = freelist - > freelist ;
free_page ( p ) ;
}
}
static struct page * free_pt_page ( unsigned long pt , struct page * freelist )
{
struct page * p = virt_to_page ( ( void * ) pt ) ;
p - > freelist = freelist ;
return p ;
}
# define DEFINE_FREE_PT_FN(LVL, FN) \
static struct page * free_pt_ # # LVL ( unsigned long __pt , struct page * freelist ) \
{ \
unsigned long p ; \
u64 * pt ; \
int i ; \
\
pt = ( u64 * ) __pt ; \
\
for ( i = 0 ; i < 512 ; + + i ) { \
/* PTE present? */ \
if ( ! IOMMU_PTE_PRESENT ( pt [ i ] ) ) \
continue ; \
\
/* Large PTE? */ \
if ( PM_PTE_LEVEL ( pt [ i ] ) = = 0 | | \
PM_PTE_LEVEL ( pt [ i ] ) = = 7 ) \
continue ; \
\
p = ( unsigned long ) IOMMU_PTE_PAGE ( pt [ i ] ) ; \
freelist = FN ( p , freelist ) ; \
} \
\
return free_pt_page ( ( unsigned long ) pt , freelist ) ; \
}
DEFINE_FREE_PT_FN ( l2 , free_pt_page )
DEFINE_FREE_PT_FN ( l3 , free_pt_l2 )
DEFINE_FREE_PT_FN ( l4 , free_pt_l3 )
DEFINE_FREE_PT_FN ( l5 , free_pt_l4 )
DEFINE_FREE_PT_FN ( l6 , free_pt_l5 )
static struct page * free_sub_pt ( unsigned long root , int mode ,
struct page * freelist )
{
switch ( mode ) {
case PAGE_MODE_NONE :
case PAGE_MODE_7_LEVEL :
break ;
case PAGE_MODE_1_LEVEL :
freelist = free_pt_page ( root , freelist ) ;
break ;
case PAGE_MODE_2_LEVEL :
freelist = free_pt_l2 ( root , freelist ) ;
break ;
case PAGE_MODE_3_LEVEL :
freelist = free_pt_l3 ( root , freelist ) ;
break ;
case PAGE_MODE_4_LEVEL :
freelist = free_pt_l4 ( root , freelist ) ;
break ;
case PAGE_MODE_5_LEVEL :
freelist = free_pt_l5 ( root , freelist ) ;
break ;
case PAGE_MODE_6_LEVEL :
freelist = free_pt_l6 ( root , freelist ) ;
break ;
default :
BUG ( ) ;
}
return freelist ;
}
void amd_iommu_domain_set_pgtable ( struct protection_domain * domain ,
u64 * root , int mode )
{
u64 pt_root ;
/* lowest 3 bits encode pgtable mode */
pt_root = mode & 7 ;
pt_root | = ( u64 ) root ;
amd_iommu_domain_set_pt_root ( domain , pt_root ) ;
}
/*
* This function is used to add another level to an IO page table . Adding
* another level increases the size of the address space by 9 bits to a size up
* to 64 bits .
*/
static bool increase_address_space ( struct protection_domain * domain ,
unsigned long address ,
gfp_t gfp )
{
unsigned long flags ;
bool ret = true ;
u64 * pte ;
2021-02-17 17:30:04 +03:00
pte = ( void * ) get_zeroed_page ( gfp ) ;
if ( ! pte )
return false ;
2020-12-15 01:36:58 -06:00
spin_lock_irqsave ( & domain - > lock , flags ) ;
2020-12-15 01:37:00 -06:00
if ( address < = PM_LEVEL_SIZE ( domain - > iop . mode ) )
2020-12-15 01:36:58 -06:00
goto out ;
ret = false ;
2020-12-15 01:37:00 -06:00
if ( WARN_ON_ONCE ( domain - > iop . mode = = PAGE_MODE_6_LEVEL ) )
2020-12-15 01:36:58 -06:00
goto out ;
2020-12-15 01:37:00 -06:00
* pte = PM_LEVEL_PDE ( domain - > iop . mode , iommu_virt_to_phys ( domain - > iop . root ) ) ;
2020-12-15 01:36:58 -06:00
2020-12-15 01:37:00 -06:00
domain - > iop . root = pte ;
domain - > iop . mode + = 1 ;
2020-12-15 01:36:58 -06:00
amd_iommu_update_and_flush_device_table ( domain ) ;
amd_iommu_domain_flush_complete ( domain ) ;
/*
* Device Table needs to be updated and flushed before the new root can
* be published .
*/
2020-12-15 01:37:00 -06:00
amd_iommu_domain_set_pgtable ( domain , pte , domain - > iop . mode ) ;
2020-12-15 01:36:58 -06:00
2021-02-17 17:30:04 +03:00
pte = NULL ;
2020-12-15 01:36:58 -06:00
ret = true ;
out :
spin_unlock_irqrestore ( & domain - > lock , flags ) ;
2021-02-17 17:30:04 +03:00
free_page ( ( unsigned long ) pte ) ;
2020-12-15 01:36:58 -06:00
return ret ;
}
static u64 * alloc_pte ( struct protection_domain * domain ,
unsigned long address ,
unsigned long page_size ,
u64 * * pte_page ,
gfp_t gfp ,
bool * updated )
{
int level , end_lvl ;
u64 * pte , * page ;
BUG_ON ( ! is_power_of_2 ( page_size ) ) ;
2020-12-15 01:37:00 -06:00
while ( address > PM_LEVEL_SIZE ( domain - > iop . mode ) ) {
2020-12-15 01:36:58 -06:00
/*
* Return an error if there is no memory to update the
* page - table .
*/
if ( ! increase_address_space ( domain , address , gfp ) )
return NULL ;
}
2020-12-15 01:37:00 -06:00
level = domain - > iop . mode - 1 ;
pte = & domain - > iop . root [ PM_LEVEL_INDEX ( level , address ) ] ;
2020-12-15 01:36:58 -06:00
address = PAGE_SIZE_ALIGN ( address , page_size ) ;
end_lvl = PAGE_SIZE_LEVEL ( page_size ) ;
while ( level > end_lvl ) {
u64 __pte , __npte ;
int pte_level ;
__pte = * pte ;
pte_level = PM_PTE_LEVEL ( __pte ) ;
/*
* If we replace a series of large PTEs , we need
* to tear down all of them .
*/
if ( IOMMU_PTE_PRESENT ( __pte ) & &
pte_level = = PAGE_MODE_7_LEVEL ) {
unsigned long count , i ;
u64 * lpte ;
lpte = first_pte_l7 ( pte , NULL , & count ) ;
/*
* Unmap the replicated PTEs that still match the
* original large mapping
*/
for ( i = 0 ; i < count ; + + i )
cmpxchg64 ( & lpte [ i ] , __pte , 0ULL ) ;
* updated = true ;
continue ;
}
if ( ! IOMMU_PTE_PRESENT ( __pte ) | |
pte_level = = PAGE_MODE_NONE ) {
page = ( u64 * ) get_zeroed_page ( gfp ) ;
if ( ! page )
return NULL ;
__npte = PM_LEVEL_PDE ( level , iommu_virt_to_phys ( page ) ) ;
/* pte could have been changed somewhere. */
if ( cmpxchg64 ( pte , __pte , __npte ) ! = __pte )
free_page ( ( unsigned long ) page ) ;
else if ( IOMMU_PTE_PRESENT ( __pte ) )
* updated = true ;
continue ;
}
/* No level skipping support yet */
if ( pte_level ! = level )
return NULL ;
level - = 1 ;
pte = IOMMU_PTE_PAGE ( __pte ) ;
if ( pte_page & & level = = end_lvl )
* pte_page = pte ;
pte = & pte [ PM_LEVEL_INDEX ( level , address ) ] ;
}
return pte ;
}
/*
* This function checks if there is a PTE for a given dma address . If
* there is one , it returns the pointer to it .
*/
2020-12-15 01:37:04 -06:00
static u64 * fetch_pte ( struct amd_io_pgtable * pgtable ,
unsigned long address ,
unsigned long * page_size )
2020-12-15 01:36:58 -06:00
{
int level ;
u64 * pte ;
* page_size = 0 ;
2020-12-15 01:37:02 -06:00
if ( address > PM_LEVEL_SIZE ( pgtable - > mode ) )
2020-12-15 01:36:58 -06:00
return NULL ;
2020-12-15 01:37:02 -06:00
level = pgtable - > mode - 1 ;
pte = & pgtable - > root [ PM_LEVEL_INDEX ( level , address ) ] ;
2020-12-15 01:36:58 -06:00
* page_size = PTE_LEVEL_PAGE_SIZE ( level ) ;
while ( level > 0 ) {
/* Not Present */
if ( ! IOMMU_PTE_PRESENT ( * pte ) )
return NULL ;
/* Large PTE */
if ( PM_PTE_LEVEL ( * pte ) = = 7 | |
PM_PTE_LEVEL ( * pte ) = = 0 )
break ;
/* No level skipping support yet */
if ( PM_PTE_LEVEL ( * pte ) ! = level )
return NULL ;
level - = 1 ;
/* Walk to the next level */
pte = IOMMU_PTE_PAGE ( * pte ) ;
pte = & pte [ PM_LEVEL_INDEX ( level , address ) ] ;
* page_size = PTE_LEVEL_PAGE_SIZE ( level ) ;
}
/*
* If we have a series of large PTEs , make
* sure to return a pointer to the first one .
*/
if ( PM_PTE_LEVEL ( * pte ) = = PAGE_MODE_7_LEVEL )
pte = first_pte_l7 ( pte , page_size , NULL ) ;
return pte ;
}
static struct page * free_clear_pte ( u64 * pte , u64 pteval , struct page * freelist )
{
unsigned long pt ;
int mode ;
while ( cmpxchg64 ( pte , pteval , 0 ) ! = pteval ) {
pr_warn ( " AMD-Vi: IOMMU pte changed since we read it \n " ) ;
pteval = * pte ;
}
if ( ! IOMMU_PTE_PRESENT ( pteval ) )
return freelist ;
pt = ( unsigned long ) IOMMU_PTE_PAGE ( pteval ) ;
mode = IOMMU_PTE_MODE ( pteval ) ;
return free_sub_pt ( pt , mode , freelist ) ;
}
/*
* Generic mapping functions . It maps a physical address into a DMA
* address space . It allocates the page table pages if necessary .
* In the future it can be extended to a generic mapping function
* supporting all features of AMD IOMMU page tables like level skipping
* and full 64 bit address spaces .
*/
2020-12-15 01:37:04 -06:00
static int iommu_v1_map_page ( struct io_pgtable_ops * ops , unsigned long iova ,
phys_addr_t paddr , size_t size , int prot , gfp_t gfp )
2020-12-15 01:36:58 -06:00
{
2020-12-15 01:37:04 -06:00
struct protection_domain * dom = io_pgtable_ops_to_domain ( ops ) ;
2020-12-15 01:36:58 -06:00
struct page * freelist = NULL ;
bool updated = false ;
u64 __pte , * pte ;
int ret , i , count ;
2020-12-15 01:37:01 -06:00
BUG_ON ( ! IS_ALIGNED ( iova , size ) ) ;
BUG_ON ( ! IS_ALIGNED ( paddr , size ) ) ;
2020-12-15 01:36:58 -06:00
ret = - EINVAL ;
if ( ! ( prot & IOMMU_PROT_MASK ) )
goto out ;
2020-12-15 01:37:01 -06:00
count = PAGE_SIZE_PTE_COUNT ( size ) ;
pte = alloc_pte ( dom , iova , size , NULL , gfp , & updated ) ;
2020-12-15 01:36:58 -06:00
ret = - ENOMEM ;
if ( ! pte )
goto out ;
for ( i = 0 ; i < count ; + + i )
freelist = free_clear_pte ( & pte [ i ] , pte [ i ] , freelist ) ;
if ( freelist ! = NULL )
updated = true ;
if ( count > 1 ) {
2020-12-15 01:37:01 -06:00
__pte = PAGE_SIZE_PTE ( __sme_set ( paddr ) , size ) ;
2020-12-15 01:36:58 -06:00
__pte | = PM_LEVEL_ENC ( 7 ) | IOMMU_PTE_PR | IOMMU_PTE_FC ;
} else
2020-12-15 01:37:01 -06:00
__pte = __sme_set ( paddr ) | IOMMU_PTE_PR | IOMMU_PTE_FC ;
2020-12-15 01:36:58 -06:00
if ( prot & IOMMU_PROT_IR )
__pte | = IOMMU_PTE_IR ;
if ( prot & IOMMU_PROT_IW )
__pte | = IOMMU_PTE_IW ;
for ( i = 0 ; i < count ; + + i )
pte [ i ] = __pte ;
ret = 0 ;
out :
if ( updated ) {
unsigned long flags ;
spin_lock_irqsave ( & dom - > lock , flags ) ;
/*
* Flush domain TLB ( s ) and wait for completion . Any Device - Table
* Updates and flushing already happened in
* increase_address_space ( ) .
*/
amd_iommu_domain_flush_tlb_pde ( dom ) ;
amd_iommu_domain_flush_complete ( dom ) ;
spin_unlock_irqrestore ( & dom - > lock , flags ) ;
}
/* Everything flushed out, free pages now */
free_page_list ( freelist ) ;
return ret ;
}
2020-12-15 01:37:04 -06:00
static unsigned long iommu_v1_unmap_page ( struct io_pgtable_ops * ops ,
unsigned long iova ,
size_t size ,
struct iommu_iotlb_gather * gather )
2020-12-15 01:36:58 -06:00
{
2020-12-15 01:37:02 -06:00
struct amd_io_pgtable * pgtable = io_pgtable_ops_to_data ( ops ) ;
2020-12-15 01:36:58 -06:00
unsigned long long unmapped ;
unsigned long unmap_size ;
u64 * pte ;
2020-12-15 01:37:01 -06:00
BUG_ON ( ! is_power_of_2 ( size ) ) ;
2020-12-15 01:36:58 -06:00
unmapped = 0 ;
2020-12-15 01:37:01 -06:00
while ( unmapped < size ) {
2020-12-15 01:37:02 -06:00
pte = fetch_pte ( pgtable , iova , & unmap_size ) ;
2020-12-15 01:36:58 -06:00
if ( pte ) {
int i , count ;
count = PAGE_SIZE_PTE_COUNT ( unmap_size ) ;
for ( i = 0 ; i < count ; i + + )
pte [ i ] = 0ULL ;
}
2020-12-15 01:37:01 -06:00
iova = ( iova & ~ ( unmap_size - 1 ) ) + unmap_size ;
2020-12-15 01:36:58 -06:00
unmapped + = unmap_size ;
}
BUG_ON ( unmapped & & ! is_power_of_2 ( unmapped ) ) ;
return unmapped ;
}
2020-12-15 01:37:03 -06:00
static phys_addr_t iommu_v1_iova_to_phys ( struct io_pgtable_ops * ops , unsigned long iova )
{
struct amd_io_pgtable * pgtable = io_pgtable_ops_to_data ( ops ) ;
unsigned long offset_mask , pte_pgsize ;
u64 * pte , __pte ;
pte = fetch_pte ( pgtable , iova , & pte_pgsize ) ;
if ( ! pte | | ! IOMMU_PTE_PRESENT ( * pte ) )
return 0 ;
offset_mask = pte_pgsize - 1 ;
__pte = __sme_clr ( * pte & PM_ADDR_MASK ) ;
return ( __pte & ~ offset_mask ) | ( iova & offset_mask ) ;
}
2020-12-15 01:36:54 -06:00
/*
* - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
*/
static void v1_free_pgtable ( struct io_pgtable * iop )
{
2020-12-15 01:36:59 -06:00
struct amd_io_pgtable * pgtable = container_of ( iop , struct amd_io_pgtable , iop ) ;
struct protection_domain * dom ;
struct page * freelist = NULL ;
unsigned long root ;
if ( pgtable - > mode = = PAGE_MODE_NONE )
return ;
dom = container_of ( pgtable , struct protection_domain , iop ) ;
/* Update data structure */
amd_iommu_domain_clr_pt_root ( dom ) ;
/* Make changes visible to IOMMUs */
amd_iommu_domain_update ( dom ) ;
/* Page-table is not visible to IOMMU anymore, so free it */
BUG_ON ( pgtable - > mode < PAGE_MODE_NONE | |
pgtable - > mode > PAGE_MODE_6_LEVEL ) ;
root = ( unsigned long ) pgtable - > root ;
freelist = free_sub_pt ( root , pgtable - > mode , freelist ) ;
free_page_list ( freelist ) ;
2020-12-15 01:36:54 -06:00
}
static struct io_pgtable * v1_alloc_pgtable ( struct io_pgtable_cfg * cfg , void * cookie )
{
struct amd_io_pgtable * pgtable = io_pgtable_cfg_to_data ( cfg ) ;
cfg - > pgsize_bitmap = AMD_IOMMU_PGSIZES ,
cfg - > ias = IOMMU_IN_ADDR_BIT_SIZE ,
cfg - > oas = IOMMU_OUT_ADDR_BIT_SIZE ,
cfg - > tlb = & v1_flush_ops ;
2020-12-15 01:37:04 -06:00
pgtable - > iop . ops . map = iommu_v1_map_page ;
pgtable - > iop . ops . unmap = iommu_v1_unmap_page ;
2020-12-15 01:37:03 -06:00
pgtable - > iop . ops . iova_to_phys = iommu_v1_iova_to_phys ;
2020-12-15 01:36:54 -06:00
return & pgtable - > iop ;
}
struct io_pgtable_init_fns io_pgtable_amd_iommu_v1_init_fns = {
. alloc = v1_alloc_pgtable ,
. free = v1_free_pgtable ,
} ;