2020-12-15 01:36:54 -06:00
// SPDX-License-Identifier: GPL-2.0-only
/*
* CPU - agnostic AMD IO page table allocator .
*
* Copyright ( C ) 2020 Advanced Micro Devices , Inc .
* Author : Suravee Suthikulpanit < suravee . suthikulpanit @ amd . com >
*/
# define pr_fmt(fmt) "AMD-Vi: " fmt
# define dev_fmt(fmt) pr_fmt(fmt)
# include <linux/atomic.h>
# include <linux/bitops.h>
# include <linux/io-pgtable.h>
# include <linux/kernel.h>
# include <linux/sizes.h>
# include <linux/slab.h>
# include <linux/types.h>
# include <linux/dma-mapping.h>
# include <asm/barrier.h>
# include "amd_iommu_types.h"
# include "amd_iommu.h"
static void v1_tlb_flush_all ( void * cookie )
{
}
static void v1_tlb_flush_walk ( unsigned long iova , size_t size ,
size_t granule , void * cookie )
{
}
static void v1_tlb_add_page ( struct iommu_iotlb_gather * gather ,
unsigned long iova , size_t granule ,
void * cookie )
{
}
static const struct iommu_flush_ops v1_flush_ops = {
. tlb_flush_all = v1_tlb_flush_all ,
. tlb_flush_walk = v1_tlb_flush_walk ,
. tlb_add_page = v1_tlb_add_page ,
} ;
2020-12-15 01:36:58 -06:00
/*
* Helper function to get the first pte of a large mapping
*/
static u64 * first_pte_l7 ( u64 * pte , unsigned long * page_size ,
unsigned long * count )
{
unsigned long pte_mask , pg_size , cnt ;
u64 * fpte ;
pg_size = PTE_PAGE_SIZE ( * pte ) ;
cnt = PAGE_SIZE_PTE_COUNT ( pg_size ) ;
pte_mask = ~ ( ( cnt < < 3 ) - 1 ) ;
fpte = ( u64 * ) ( ( ( unsigned long ) pte ) & pte_mask ) ;
if ( page_size )
* page_size = pg_size ;
if ( count )
* count = cnt ;
return fpte ;
}
/****************************************************************************
*
* The functions below are used the create the page table mappings for
* unity mapped regions .
*
* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * */
2021-12-17 15:30:59 +00:00
static void free_pt_page ( u64 * pt , struct list_head * freelist )
2020-12-15 01:36:58 -06:00
{
2021-12-17 15:30:58 +00:00
struct page * p = virt_to_page ( pt ) ;
2020-12-15 01:36:58 -06:00
2021-12-17 15:30:59 +00:00
list_add_tail ( & p - > lru , freelist ) ;
2020-12-15 01:36:58 -06:00
}
2021-12-17 15:30:59 +00:00
static void free_pt_lvl ( u64 * pt , struct list_head * freelist , int lvl )
2021-12-17 15:30:58 +00:00
{
u64 * p ;
int i ;
for ( i = 0 ; i < 512 ; + + i ) {
/* PTE present? */
if ( ! IOMMU_PTE_PRESENT ( pt [ i ] ) )
continue ;
2020-12-15 01:36:58 -06:00
2021-12-17 15:30:58 +00:00
/* Large PTE? */
if ( PM_PTE_LEVEL ( pt [ i ] ) = = 0 | |
PM_PTE_LEVEL ( pt [ i ] ) = = 7 )
continue ;
2020-12-15 01:36:58 -06:00
2021-12-17 15:30:58 +00:00
/*
* Free the next level . No need to look at l1 tables here since
* they can only contain leaf PTEs ; just free them directly .
*/
p = IOMMU_PTE_PAGE ( pt [ i ] ) ;
if ( lvl > 2 )
2021-12-17 15:30:59 +00:00
free_pt_lvl ( p , freelist , lvl - 1 ) ;
2021-12-17 15:30:58 +00:00
else
2021-12-17 15:30:59 +00:00
free_pt_page ( p , freelist ) ;
2021-12-17 15:30:58 +00:00
}
2021-12-17 15:30:59 +00:00
free_pt_page ( pt , freelist ) ;
2021-12-17 15:30:58 +00:00
}
2021-12-17 15:30:59 +00:00
static void free_sub_pt ( u64 * root , int mode , struct list_head * freelist )
2020-12-15 01:36:58 -06:00
{
switch ( mode ) {
case PAGE_MODE_NONE :
case PAGE_MODE_7_LEVEL :
break ;
case PAGE_MODE_1_LEVEL :
2021-12-17 15:30:59 +00:00
free_pt_page ( root , freelist ) ;
2020-12-15 01:36:58 -06:00
break ;
case PAGE_MODE_2_LEVEL :
case PAGE_MODE_3_LEVEL :
case PAGE_MODE_4_LEVEL :
case PAGE_MODE_5_LEVEL :
case PAGE_MODE_6_LEVEL :
2021-12-17 15:30:58 +00:00
free_pt_lvl ( root , freelist , mode ) ;
2020-12-15 01:36:58 -06:00
break ;
default :
BUG ( ) ;
}
}
void amd_iommu_domain_set_pgtable ( struct protection_domain * domain ,
u64 * root , int mode )
{
u64 pt_root ;
/* lowest 3 bits encode pgtable mode */
pt_root = mode & 7 ;
pt_root | = ( u64 ) root ;
amd_iommu_domain_set_pt_root ( domain , pt_root ) ;
}
/*
* This function is used to add another level to an IO page table . Adding
* another level increases the size of the address space by 9 bits to a size up
* to 64 bits .
*/
static bool increase_address_space ( struct protection_domain * domain ,
unsigned long address ,
gfp_t gfp )
{
unsigned long flags ;
bool ret = true ;
u64 * pte ;
2021-02-17 17:30:04 +03:00
pte = ( void * ) get_zeroed_page ( gfp ) ;
if ( ! pte )
return false ;
2020-12-15 01:36:58 -06:00
spin_lock_irqsave ( & domain - > lock , flags ) ;
2020-12-15 01:37:00 -06:00
if ( address < = PM_LEVEL_SIZE ( domain - > iop . mode ) )
2020-12-15 01:36:58 -06:00
goto out ;
ret = false ;
2020-12-15 01:37:00 -06:00
if ( WARN_ON_ONCE ( domain - > iop . mode = = PAGE_MODE_6_LEVEL ) )
2020-12-15 01:36:58 -06:00
goto out ;
2020-12-15 01:37:00 -06:00
* pte = PM_LEVEL_PDE ( domain - > iop . mode , iommu_virt_to_phys ( domain - > iop . root ) ) ;
2020-12-15 01:36:58 -06:00
2020-12-15 01:37:00 -06:00
domain - > iop . root = pte ;
domain - > iop . mode + = 1 ;
2020-12-15 01:36:58 -06:00
amd_iommu_update_and_flush_device_table ( domain ) ;
amd_iommu_domain_flush_complete ( domain ) ;
/*
* Device Table needs to be updated and flushed before the new root can
* be published .
*/
2020-12-15 01:37:00 -06:00
amd_iommu_domain_set_pgtable ( domain , pte , domain - > iop . mode ) ;
2020-12-15 01:36:58 -06:00
2021-02-17 17:30:04 +03:00
pte = NULL ;
2020-12-15 01:36:58 -06:00
ret = true ;
out :
spin_unlock_irqrestore ( & domain - > lock , flags ) ;
2021-02-17 17:30:04 +03:00
free_page ( ( unsigned long ) pte ) ;
2020-12-15 01:36:58 -06:00
return ret ;
}
static u64 * alloc_pte ( struct protection_domain * domain ,
unsigned long address ,
unsigned long page_size ,
u64 * * pte_page ,
gfp_t gfp ,
bool * updated )
{
int level , end_lvl ;
u64 * pte , * page ;
BUG_ON ( ! is_power_of_2 ( page_size ) ) ;
2020-12-15 01:37:00 -06:00
while ( address > PM_LEVEL_SIZE ( domain - > iop . mode ) ) {
2020-12-15 01:36:58 -06:00
/*
* Return an error if there is no memory to update the
* page - table .
*/
if ( ! increase_address_space ( domain , address , gfp ) )
return NULL ;
}
2020-12-15 01:37:00 -06:00
level = domain - > iop . mode - 1 ;
pte = & domain - > iop . root [ PM_LEVEL_INDEX ( level , address ) ] ;
2020-12-15 01:36:58 -06:00
address = PAGE_SIZE_ALIGN ( address , page_size ) ;
end_lvl = PAGE_SIZE_LEVEL ( page_size ) ;
while ( level > end_lvl ) {
u64 __pte , __npte ;
int pte_level ;
__pte = * pte ;
pte_level = PM_PTE_LEVEL ( __pte ) ;
/*
* If we replace a series of large PTEs , we need
* to tear down all of them .
*/
if ( IOMMU_PTE_PRESENT ( __pte ) & &
pte_level = = PAGE_MODE_7_LEVEL ) {
unsigned long count , i ;
u64 * lpte ;
lpte = first_pte_l7 ( pte , NULL , & count ) ;
/*
* Unmap the replicated PTEs that still match the
* original large mapping
*/
for ( i = 0 ; i < count ; + + i )
cmpxchg64 ( & lpte [ i ] , __pte , 0ULL ) ;
* updated = true ;
continue ;
}
if ( ! IOMMU_PTE_PRESENT ( __pte ) | |
pte_level = = PAGE_MODE_NONE ) {
page = ( u64 * ) get_zeroed_page ( gfp ) ;
if ( ! page )
return NULL ;
__npte = PM_LEVEL_PDE ( level , iommu_virt_to_phys ( page ) ) ;
/* pte could have been changed somewhere. */
2022-05-25 16:54:16 +02:00
if ( ! try_cmpxchg64 ( pte , & __pte , __npte ) )
2020-12-15 01:36:58 -06:00
free_page ( ( unsigned long ) page ) ;
else if ( IOMMU_PTE_PRESENT ( __pte ) )
* updated = true ;
continue ;
}
/* No level skipping support yet */
if ( pte_level ! = level )
return NULL ;
level - = 1 ;
pte = IOMMU_PTE_PAGE ( __pte ) ;
if ( pte_page & & level = = end_lvl )
* pte_page = pte ;
pte = & pte [ PM_LEVEL_INDEX ( level , address ) ] ;
}
return pte ;
}
/*
* This function checks if there is a PTE for a given dma address . If
* there is one , it returns the pointer to it .
*/
2020-12-15 01:37:04 -06:00
static u64 * fetch_pte ( struct amd_io_pgtable * pgtable ,
unsigned long address ,
unsigned long * page_size )
2020-12-15 01:36:58 -06:00
{
int level ;
u64 * pte ;
* page_size = 0 ;
2020-12-15 01:37:02 -06:00
if ( address > PM_LEVEL_SIZE ( pgtable - > mode ) )
2020-12-15 01:36:58 -06:00
return NULL ;
2020-12-15 01:37:02 -06:00
level = pgtable - > mode - 1 ;
pte = & pgtable - > root [ PM_LEVEL_INDEX ( level , address ) ] ;
2020-12-15 01:36:58 -06:00
* page_size = PTE_LEVEL_PAGE_SIZE ( level ) ;
while ( level > 0 ) {
/* Not Present */
if ( ! IOMMU_PTE_PRESENT ( * pte ) )
return NULL ;
/* Large PTE */
if ( PM_PTE_LEVEL ( * pte ) = = 7 | |
PM_PTE_LEVEL ( * pte ) = = 0 )
break ;
/* No level skipping support yet */
if ( PM_PTE_LEVEL ( * pte ) ! = level )
return NULL ;
level - = 1 ;
/* Walk to the next level */
pte = IOMMU_PTE_PAGE ( * pte ) ;
pte = & pte [ PM_LEVEL_INDEX ( level , address ) ] ;
* page_size = PTE_LEVEL_PAGE_SIZE ( level ) ;
}
/*
* If we have a series of large PTEs , make
* sure to return a pointer to the first one .
*/
if ( PM_PTE_LEVEL ( * pte ) = = PAGE_MODE_7_LEVEL )
pte = first_pte_l7 ( pte , page_size , NULL ) ;
return pte ;
}
2021-12-17 15:30:59 +00:00
static void free_clear_pte ( u64 * pte , u64 pteval , struct list_head * freelist )
2020-12-15 01:36:58 -06:00
{
2021-12-17 15:30:58 +00:00
u64 * pt ;
2020-12-15 01:36:58 -06:00
int mode ;
2022-05-25 16:54:16 +02:00
while ( ! try_cmpxchg64 ( pte , & pteval , 0 ) )
2020-12-15 01:36:58 -06:00
pr_warn ( " AMD-Vi: IOMMU pte changed since we read it \n " ) ;
if ( ! IOMMU_PTE_PRESENT ( pteval ) )
2021-12-17 15:30:59 +00:00
return ;
2020-12-15 01:36:58 -06:00
2021-12-17 15:30:58 +00:00
pt = IOMMU_PTE_PAGE ( pteval ) ;
2020-12-15 01:36:58 -06:00
mode = IOMMU_PTE_MODE ( pteval ) ;
2021-12-17 15:30:59 +00:00
free_sub_pt ( pt , mode , freelist ) ;
2020-12-15 01:36:58 -06:00
}
/*
* Generic mapping functions . It maps a physical address into a DMA
* address space . It allocates the page table pages if necessary .
* In the future it can be extended to a generic mapping function
* supporting all features of AMD IOMMU page tables like level skipping
* and full 64 bit address spaces .
*/
2022-08-25 06:39:31 +00:00
static int iommu_v1_map_pages ( struct io_pgtable_ops * ops , unsigned long iova ,
phys_addr_t paddr , size_t pgsize , size_t pgcount ,
int prot , gfp_t gfp , size_t * mapped )
2020-12-15 01:36:58 -06:00
{
2020-12-15 01:37:04 -06:00
struct protection_domain * dom = io_pgtable_ops_to_domain ( ops ) ;
2021-12-17 15:30:59 +00:00
LIST_HEAD ( freelist ) ;
2020-12-15 01:36:58 -06:00
bool updated = false ;
u64 __pte , * pte ;
int ret , i , count ;
2022-08-25 06:39:31 +00:00
BUG_ON ( ! IS_ALIGNED ( iova , pgsize ) ) ;
BUG_ON ( ! IS_ALIGNED ( paddr , pgsize ) ) ;
2020-12-15 01:36:58 -06:00
ret = - EINVAL ;
if ( ! ( prot & IOMMU_PROT_MASK ) )
goto out ;
2022-08-25 06:39:31 +00:00
while ( pgcount > 0 ) {
count = PAGE_SIZE_PTE_COUNT ( pgsize ) ;
pte = alloc_pte ( dom , iova , pgsize , NULL , gfp , & updated ) ;
2020-12-15 01:36:58 -06:00
2022-08-25 06:39:31 +00:00
ret = - ENOMEM ;
if ( ! pte )
goto out ;
2020-12-15 01:36:58 -06:00
2022-08-25 06:39:31 +00:00
for ( i = 0 ; i < count ; + + i )
free_clear_pte ( & pte [ i ] , pte [ i ] , & freelist ) ;
2020-12-15 01:36:58 -06:00
2022-08-25 06:39:31 +00:00
if ( ! list_empty ( & freelist ) )
updated = true ;
2020-12-15 01:36:58 -06:00
2022-08-25 06:39:31 +00:00
if ( count > 1 ) {
__pte = PAGE_SIZE_PTE ( __sme_set ( paddr ) , pgsize ) ;
__pte | = PM_LEVEL_ENC ( 7 ) | IOMMU_PTE_PR | IOMMU_PTE_FC ;
} else
__pte = __sme_set ( paddr ) | IOMMU_PTE_PR | IOMMU_PTE_FC ;
2020-12-15 01:36:58 -06:00
2022-08-25 06:39:31 +00:00
if ( prot & IOMMU_PROT_IR )
__pte | = IOMMU_PTE_IR ;
if ( prot & IOMMU_PROT_IW )
__pte | = IOMMU_PTE_IW ;
2020-12-15 01:36:58 -06:00
2022-08-25 06:39:31 +00:00
for ( i = 0 ; i < count ; + + i )
pte [ i ] = __pte ;
iova + = pgsize ;
paddr + = pgsize ;
pgcount - - ;
if ( mapped )
* mapped + = pgsize ;
}
2020-12-15 01:36:58 -06:00
ret = 0 ;
out :
if ( updated ) {
unsigned long flags ;
spin_lock_irqsave ( & dom - > lock , flags ) ;
/*
* Flush domain TLB ( s ) and wait for completion . Any Device - Table
* Updates and flushing already happened in
* increase_address_space ( ) .
*/
amd_iommu_domain_flush_tlb_pde ( dom ) ;
amd_iommu_domain_flush_complete ( dom ) ;
spin_unlock_irqrestore ( & dom - > lock , flags ) ;
}
/* Everything flushed out, free pages now */
2021-12-17 15:30:59 +00:00
put_pages_list ( & freelist ) ;
2020-12-15 01:36:58 -06:00
return ret ;
}
2022-08-25 06:39:32 +00:00
static unsigned long iommu_v1_unmap_pages ( struct io_pgtable_ops * ops ,
unsigned long iova ,
size_t pgsize , size_t pgcount ,
struct iommu_iotlb_gather * gather )
2020-12-15 01:36:58 -06:00
{
2020-12-15 01:37:02 -06:00
struct amd_io_pgtable * pgtable = io_pgtable_ops_to_data ( ops ) ;
2020-12-15 01:36:58 -06:00
unsigned long long unmapped ;
unsigned long unmap_size ;
u64 * pte ;
2022-08-25 06:39:32 +00:00
size_t size = pgcount < < __ffs ( pgsize ) ;
2020-12-15 01:36:58 -06:00
2022-08-25 06:39:32 +00:00
BUG_ON ( ! is_power_of_2 ( pgsize ) ) ;
2020-12-15 01:36:58 -06:00
unmapped = 0 ;
2020-12-15 01:37:01 -06:00
while ( unmapped < size ) {
2020-12-15 01:37:02 -06:00
pte = fetch_pte ( pgtable , iova , & unmap_size ) ;
2020-12-15 01:36:58 -06:00
if ( pte ) {
int i , count ;
count = PAGE_SIZE_PTE_COUNT ( unmap_size ) ;
for ( i = 0 ; i < count ; i + + )
pte [ i ] = 0ULL ;
2022-08-25 06:39:32 +00:00
} else {
return unmapped ;
2020-12-15 01:36:58 -06:00
}
2020-12-15 01:37:01 -06:00
iova = ( iova & ~ ( unmap_size - 1 ) ) + unmap_size ;
2020-12-15 01:36:58 -06:00
unmapped + = unmap_size ;
}
return unmapped ;
}
2020-12-15 01:37:03 -06:00
static phys_addr_t iommu_v1_iova_to_phys ( struct io_pgtable_ops * ops , unsigned long iova )
{
struct amd_io_pgtable * pgtable = io_pgtable_ops_to_data ( ops ) ;
unsigned long offset_mask , pte_pgsize ;
u64 * pte , __pte ;
pte = fetch_pte ( pgtable , iova , & pte_pgsize ) ;
if ( ! pte | | ! IOMMU_PTE_PRESENT ( * pte ) )
return 0 ;
offset_mask = pte_pgsize - 1 ;
__pte = __sme_clr ( * pte & PM_ADDR_MASK ) ;
return ( __pte & ~ offset_mask ) | ( iova & offset_mask ) ;
}
2020-12-15 01:36:54 -06:00
/*
* - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
*/
static void v1_free_pgtable ( struct io_pgtable * iop )
{
2020-12-15 01:36:59 -06:00
struct amd_io_pgtable * pgtable = container_of ( iop , struct amd_io_pgtable , iop ) ;
struct protection_domain * dom ;
2021-12-17 15:30:59 +00:00
LIST_HEAD ( freelist ) ;
2020-12-15 01:36:59 -06:00
if ( pgtable - > mode = = PAGE_MODE_NONE )
return ;
dom = container_of ( pgtable , struct protection_domain , iop ) ;
/* Page-table is not visible to IOMMU anymore, so free it */
BUG_ON ( pgtable - > mode < PAGE_MODE_NONE | |
pgtable - > mode > PAGE_MODE_6_LEVEL ) ;
2021-12-17 15:30:59 +00:00
free_sub_pt ( pgtable - > root , pgtable - > mode , & freelist ) ;
2020-12-15 01:36:59 -06:00
2022-02-10 09:47:45 -06:00
/* Update data structure */
amd_iommu_domain_clr_pt_root ( dom ) ;
/* Make changes visible to IOMMUs */
amd_iommu_domain_update ( dom ) ;
2021-12-17 15:30:59 +00:00
put_pages_list ( & freelist ) ;
2020-12-15 01:36:54 -06:00
}
static struct io_pgtable * v1_alloc_pgtable ( struct io_pgtable_cfg * cfg , void * cookie )
{
struct amd_io_pgtable * pgtable = io_pgtable_cfg_to_data ( cfg ) ;
cfg - > pgsize_bitmap = AMD_IOMMU_PGSIZES ,
cfg - > ias = IOMMU_IN_ADDR_BIT_SIZE ,
cfg - > oas = IOMMU_OUT_ADDR_BIT_SIZE ,
cfg - > tlb = & v1_flush_ops ;
2022-08-25 06:39:31 +00:00
pgtable - > iop . ops . map_pages = iommu_v1_map_pages ;
2022-08-25 06:39:32 +00:00
pgtable - > iop . ops . unmap_pages = iommu_v1_unmap_pages ;
2020-12-15 01:37:03 -06:00
pgtable - > iop . ops . iova_to_phys = iommu_v1_iova_to_phys ;
2020-12-15 01:36:54 -06:00
return & pgtable - > iop ;
}
struct io_pgtable_init_fns io_pgtable_amd_iommu_v1_init_fns = {
. alloc = v1_alloc_pgtable ,
. free = v1_free_pgtable ,
} ;