2016-05-27 08:48:59 +03:00
/*
* Copyright 2016 , Rashmica Gupta , IBM Corp .
*
* This traverses the kernel pagetables and dumps the
* information about the used sections of memory to
* / sys / kernel / debug / kernel_pagetables .
*
* Derived from the arm64 implementation :
* Copyright ( c ) 2014 , The Linux Foundation , Laura Abbott .
* ( C ) Copyright 2008 Intel Corporation , Arjan van de Ven .
*
* This program is free software ; you can redistribute it and / or
* modify it under the terms of the GNU General Public License
* as published by the Free Software Foundation ; version 2
* of the License .
*/
# include <linux/debugfs.h>
# include <linux/fs.h>
2017-05-16 13:42:53 +03:00
# include <linux/hugetlb.h>
2016-05-27 08:48:59 +03:00
# include <linux/io.h>
# include <linux/mm.h>
# include <linux/sched.h>
# include <linux/seq_file.h>
# include <asm/fixmap.h>
# include <asm/pgtable.h>
# include <linux/const.h>
# include <asm/page.h>
# include <asm/pgalloc.h>
2017-04-18 09:20:13 +03:00
# ifdef CONFIG_PPC32
# define KERN_VIRT_START 0
# endif
2016-05-27 08:48:59 +03:00
/*
* To visualise what is happening ,
*
* - PTRS_PER_P * * = how many entries there are in the corresponding P * *
* - P * * _SHIFT = how many bits of the address we use to index into the
* corresponding P * *
* - P * * _SIZE is how much memory we can access through the table - not the
* size of the table itself .
* P * * = { PGD , PUD , PMD , PTE }
*
*
* Each entry of the PGD points to a PUD . Each entry of a PUD points to a
* PMD . Each entry of a PMD points to a PTE . And every PTE entry points to
* a page .
*
* In the case where there are only 3 levels , the PUD is folded into the
* PGD : every PUD has only one entry which points to the PMD .
*
* The page dumper groups page table entries of the same type into a single
* description . It uses pg_state to track the range information while
* iterating over the PTE entries . When the continuity is broken it then
* dumps out a description of the range - ie PTEs that are virtually contiguous
* with the same PTE flags are chunked together . This is to make it clear how
* different areas of the kernel virtual memory are used .
*
*/
struct pg_state {
struct seq_file * seq ;
const struct addr_marker * marker ;
unsigned long start_address ;
2017-03-31 04:37:49 +03:00
unsigned long start_pa ;
unsigned long last_pa ;
2016-05-27 08:48:59 +03:00
unsigned int level ;
u64 current_flags ;
} ;
struct addr_marker {
unsigned long start_address ;
const char * name ;
} ;
static struct addr_marker address_markers [ ] = {
{ 0 , " Start of kernel VM " } ,
{ 0 , " vmalloc() Area " } ,
{ 0 , " vmalloc() End " } ,
2017-04-18 09:20:13 +03:00
# ifdef CONFIG_PPC64
2016-05-27 08:48:59 +03:00
{ 0 , " isa I/O start " } ,
{ 0 , " isa I/O end " } ,
{ 0 , " phb I/O start " } ,
{ 0 , " phb I/O end " } ,
{ 0 , " I/O remap start " } ,
{ 0 , " I/O remap end " } ,
{ 0 , " vmemmap start " } ,
2017-04-18 09:20:13 +03:00
# else
{ 0 , " Early I/O remap start " } ,
{ 0 , " Early I/O remap end " } ,
# ifdef CONFIG_NOT_COHERENT_CACHE
{ 0 , " Consistent mem start " } ,
{ 0 , " Consistent mem end " } ,
# endif
# ifdef CONFIG_HIGHMEM
{ 0 , " Highmem PTEs start " } ,
{ 0 , " Highmem PTEs end " } ,
# endif
{ 0 , " Fixmap start " } ,
{ 0 , " Fixmap end " } ,
# endif
2016-05-27 08:48:59 +03:00
{ - 1 , NULL } ,
} ;
struct flag_info {
u64 mask ;
u64 val ;
const char * set ;
const char * clear ;
bool is_val ;
int shift ;
} ;
static const struct flag_info flag_array [ ] = {
{
2018-01-12 15:45:27 +03:00
. mask = _PAGE_USER | _PAGE_PRIVILEGED ,
2016-05-27 08:48:59 +03:00
. val = _PAGE_USER ,
. set = " user " ,
. clear = " " ,
} , {
2018-01-12 15:45:29 +03:00
. mask = _PAGE_RW | _PAGE_RO | _PAGE_NA ,
2016-05-27 08:48:59 +03:00
. val = _PAGE_RW ,
. set = " rw " ,
} , {
2018-01-12 15:45:29 +03:00
. mask = _PAGE_RW | _PAGE_RO | _PAGE_NA ,
. val = _PAGE_RO ,
. set = " ro " ,
} , {
# if _PAGE_NA != 0
. mask = _PAGE_RW | _PAGE_RO | _PAGE_NA ,
. val = _PAGE_RO ,
. set = " na " ,
} , {
# endif
2016-05-27 08:48:59 +03:00
. mask = _PAGE_EXEC ,
. val = _PAGE_EXEC ,
. set = " X " ,
. clear = " " ,
} , {
. mask = _PAGE_PTE ,
. val = _PAGE_PTE ,
. set = " pte " ,
. clear = " " ,
} , {
. mask = _PAGE_PRESENT ,
. val = _PAGE_PRESENT ,
. set = " present " ,
. clear = " " ,
} , {
2017-10-19 07:08:43 +03:00
# ifdef CONFIG_PPC_BOOK3S_64
2016-05-27 08:48:59 +03:00
. mask = H_PAGE_HASHPTE ,
. val = H_PAGE_HASHPTE ,
# else
. mask = _PAGE_HASHPTE ,
. val = _PAGE_HASHPTE ,
# endif
. set = " hpte " ,
. clear = " " ,
} , {
2017-10-19 07:08:43 +03:00
# ifndef CONFIG_PPC_BOOK3S_64
2016-05-27 08:48:59 +03:00
. mask = _PAGE_GUARDED ,
. val = _PAGE_GUARDED ,
. set = " guarded " ,
. clear = " " ,
} , {
# endif
. mask = _PAGE_DIRTY ,
. val = _PAGE_DIRTY ,
. set = " dirty " ,
. clear = " " ,
} , {
. mask = _PAGE_ACCESSED ,
. val = _PAGE_ACCESSED ,
. set = " accessed " ,
. clear = " " ,
} , {
2017-10-19 07:08:43 +03:00
# ifndef CONFIG_PPC_BOOK3S_64
2016-05-27 08:48:59 +03:00
. mask = _PAGE_WRITETHRU ,
. val = _PAGE_WRITETHRU ,
. set = " write through " ,
. clear = " " ,
} , {
# endif
2017-03-31 04:37:48 +03:00
# ifndef CONFIG_PPC_BOOK3S_64
2016-05-27 08:48:59 +03:00
. mask = _PAGE_NO_CACHE ,
. val = _PAGE_NO_CACHE ,
. set = " no cache " ,
. clear = " " ,
} , {
2017-03-31 04:37:48 +03:00
# else
. mask = _PAGE_NON_IDEMPOTENT ,
. val = _PAGE_NON_IDEMPOTENT ,
. set = " non-idempotent " ,
. clear = " " ,
} , {
. mask = _PAGE_TOLERANT ,
. val = _PAGE_TOLERANT ,
. set = " tolerant " ,
. clear = " " ,
} , {
# endif
2016-11-30 11:41:02 +03:00
# ifdef CONFIG_PPC_BOOK3S_64
2016-05-27 08:48:59 +03:00
. mask = H_PAGE_BUSY ,
. val = H_PAGE_BUSY ,
. set = " busy " ,
} , {
# ifdef CONFIG_PPC_64K_PAGES
. mask = H_PAGE_COMBO ,
. val = H_PAGE_COMBO ,
. set = " combo " ,
} , {
. mask = H_PAGE_4K_PFN ,
. val = H_PAGE_4K_PFN ,
. set = " 4K_pfn " ,
} , {
2017-11-06 11:50:52 +03:00
# else /* CONFIG_PPC_64K_PAGES */
2016-05-27 08:48:59 +03:00
. mask = H_PAGE_F_GIX ,
. val = H_PAGE_F_GIX ,
. set = " f_gix " ,
. is_val = true ,
. shift = H_PAGE_F_GIX_SHIFT ,
} , {
. mask = H_PAGE_F_SECOND ,
. val = H_PAGE_F_SECOND ,
. set = " f_second " ,
} , {
2017-11-06 11:50:52 +03:00
# endif /* CONFIG_PPC_64K_PAGES */
2016-11-30 11:41:02 +03:00
# endif
2016-05-27 08:48:59 +03:00
. mask = _PAGE_SPECIAL ,
. val = _PAGE_SPECIAL ,
. set = " special " ,
}
} ;
struct pgtable_level {
const struct flag_info * flag ;
size_t num ;
u64 mask ;
} ;
static struct pgtable_level pg_level [ ] = {
{
} , { /* pgd */
. flag = flag_array ,
. num = ARRAY_SIZE ( flag_array ) ,
} , { /* pud */
. flag = flag_array ,
. num = ARRAY_SIZE ( flag_array ) ,
} , { /* pmd */
. flag = flag_array ,
. num = ARRAY_SIZE ( flag_array ) ,
} , { /* pte */
. flag = flag_array ,
. num = ARRAY_SIZE ( flag_array ) ,
} ,
} ;
static void dump_flag_info ( struct pg_state * st , const struct flag_info
* flag , u64 pte , int num )
{
unsigned int i ;
for ( i = 0 ; i < num ; i + + , flag + + ) {
const char * s = NULL ;
u64 val ;
/* flag not defined so don't check it */
if ( flag - > mask = = 0 )
continue ;
/* Some 'flags' are actually values */
if ( flag - > is_val ) {
val = pte & flag - > val ;
if ( flag - > shift )
val = val > > flag - > shift ;
seq_printf ( st - > seq , " %s:%llx " , flag - > set , val ) ;
} else {
if ( ( pte & flag - > mask ) = = flag - > val )
s = flag - > set ;
else
s = flag - > clear ;
if ( s )
seq_printf ( st - > seq , " %s " , s ) ;
}
st - > current_flags & = ~ flag - > mask ;
}
if ( st - > current_flags ! = 0 )
seq_printf ( st - > seq , " unknown flags:%llx " , st - > current_flags ) ;
}
static void dump_addr ( struct pg_state * st , unsigned long addr )
{
static const char units [ ] = " KMGTPE " ;
const char * unit = units ;
unsigned long delta ;
2017-04-13 15:41:40 +03:00
# ifdef CONFIG_PPC64
2017-03-31 04:37:49 +03:00
seq_printf ( st - > seq , " 0x%016lx-0x%016lx " , st - > start_address , addr - 1 ) ;
seq_printf ( st - > seq , " 0x%016lx " , st - > start_pa ) ;
2017-04-13 15:41:40 +03:00
# else
seq_printf ( st - > seq , " 0x%08lx-0x%08lx " , st - > start_address , addr - 1 ) ;
seq_printf ( st - > seq , " 0x%08lx " , st - > start_pa ) ;
# endif
2017-03-31 04:37:49 +03:00
2016-05-27 08:48:59 +03:00
delta = ( addr - st - > start_address ) > > 10 ;
/* Work out what appropriate unit to use */
while ( ! ( delta & 1023 ) & & unit [ 1 ] ) {
delta > > = 10 ;
unit + + ;
}
seq_printf ( st - > seq , " %9lu%c " , delta , * unit ) ;
}
static void note_page ( struct pg_state * st , unsigned long addr ,
unsigned int level , u64 val )
{
u64 flag = val & pg_level [ level ] . mask ;
2017-03-31 04:37:49 +03:00
u64 pa = val & PTE_RPN_MASK ;
2016-05-27 08:48:59 +03:00
/* At first no level is set */
if ( ! st - > level ) {
st - > level = level ;
st - > current_flags = flag ;
st - > start_address = addr ;
2017-03-31 04:37:49 +03:00
st - > start_pa = pa ;
st - > last_pa = pa ;
2016-05-27 08:48:59 +03:00
seq_printf ( st - > seq , " ---[ %s ]--- \n " , st - > marker - > name ) ;
/*
* Dump the section of virtual memory when :
* - the PTE flags from one entry to the next differs .
* - we change levels in the tree .
* - the address is in a different section of memory and is thus
* used for a different purpose , regardless of the flags .
2017-03-31 04:37:49 +03:00
* - the pa of this page is not adjacent to the last inspected page
2016-05-27 08:48:59 +03:00
*/
} else if ( flag ! = st - > current_flags | | level ! = st - > level | |
2017-03-31 04:37:49 +03:00
addr > = st - > marker [ 1 ] . start_address | |
pa ! = st - > last_pa + PAGE_SIZE ) {
2016-05-27 08:48:59 +03:00
/* Check the PTE flags */
if ( st - > current_flags ) {
dump_addr ( st , addr ) ;
/* Dump all the flags */
if ( pg_level [ st - > level ] . flag )
dump_flag_info ( st , pg_level [ st - > level ] . flag ,
st - > current_flags ,
pg_level [ st - > level ] . num ) ;
2017-05-07 17:32:04 +03:00
seq_putc ( st - > seq , ' \n ' ) ;
2016-05-27 08:48:59 +03:00
}
/*
* Address indicates we have passed the end of the
* current section of virtual memory
*/
while ( addr > = st - > marker [ 1 ] . start_address ) {
st - > marker + + ;
seq_printf ( st - > seq , " ---[ %s ]--- \n " , st - > marker - > name ) ;
}
st - > start_address = addr ;
2017-03-31 04:37:49 +03:00
st - > start_pa = pa ;
st - > last_pa = pa ;
2016-05-27 08:48:59 +03:00
st - > current_flags = flag ;
st - > level = level ;
2017-03-31 04:37:49 +03:00
} else {
st - > last_pa = pa ;
2016-05-27 08:48:59 +03:00
}
}
static void walk_pte ( struct pg_state * st , pmd_t * pmd , unsigned long start )
{
pte_t * pte = pte_offset_kernel ( pmd , 0 ) ;
unsigned long addr ;
unsigned int i ;
for ( i = 0 ; i < PTRS_PER_PTE ; i + + , pte + + ) {
addr = start + i * PAGE_SIZE ;
note_page ( st , addr , 4 , pte_val ( * pte ) ) ;
}
}
static void walk_pmd ( struct pg_state * st , pud_t * pud , unsigned long start )
{
pmd_t * pmd = pmd_offset ( pud , 0 ) ;
unsigned long addr ;
unsigned int i ;
for ( i = 0 ; i < PTRS_PER_PMD ; i + + , pmd + + ) {
addr = start + i * PMD_SIZE ;
2017-05-16 13:42:53 +03:00
if ( ! pmd_none ( * pmd ) & & ! pmd_huge ( * pmd ) )
2016-05-27 08:48:59 +03:00
/* pmd exists */
walk_pte ( st , pmd , addr ) ;
else
note_page ( st , addr , 3 , pmd_val ( * pmd ) ) ;
}
}
static void walk_pud ( struct pg_state * st , pgd_t * pgd , unsigned long start )
{
pud_t * pud = pud_offset ( pgd , 0 ) ;
unsigned long addr ;
unsigned int i ;
for ( i = 0 ; i < PTRS_PER_PUD ; i + + , pud + + ) {
addr = start + i * PUD_SIZE ;
2017-05-16 13:42:53 +03:00
if ( ! pud_none ( * pud ) & & ! pud_huge ( * pud ) )
2016-05-27 08:48:59 +03:00
/* pud exists */
walk_pmd ( st , pud , addr ) ;
else
note_page ( st , addr , 2 , pud_val ( * pud ) ) ;
}
}
static void walk_pagetables ( struct pg_state * st )
{
pgd_t * pgd = pgd_offset_k ( 0UL ) ;
unsigned int i ;
unsigned long addr ;
/*
* Traverse the linux pagetable structure and dump pages that are in
* the hash pagetable .
*/
for ( i = 0 ; i < PTRS_PER_PGD ; i + + , pgd + + ) {
addr = KERN_VIRT_START + i * PGDIR_SIZE ;
2017-05-16 13:42:53 +03:00
if ( ! pgd_none ( * pgd ) & & ! pgd_huge ( * pgd ) )
2016-05-27 08:48:59 +03:00
/* pgd exists */
walk_pud ( st , pgd , addr ) ;
else
note_page ( st , addr , 1 , pgd_val ( * pgd ) ) ;
}
}
static void populate_markers ( void )
{
2017-04-18 09:20:13 +03:00
int i = 0 ;
address_markers [ i + + ] . start_address = PAGE_OFFSET ;
address_markers [ i + + ] . start_address = VMALLOC_START ;
address_markers [ i + + ] . start_address = VMALLOC_END ;
# ifdef CONFIG_PPC64
address_markers [ i + + ] . start_address = ISA_IO_BASE ;
address_markers [ i + + ] . start_address = ISA_IO_END ;
address_markers [ i + + ] . start_address = PHB_IO_BASE ;
address_markers [ i + + ] . start_address = PHB_IO_END ;
address_markers [ i + + ] . start_address = IOREMAP_BASE ;
address_markers [ i + + ] . start_address = IOREMAP_END ;
2017-10-19 07:08:43 +03:00
# ifdef CONFIG_PPC_BOOK3S_64
2017-04-18 09:20:13 +03:00
address_markers [ i + + ] . start_address = H_VMEMMAP_BASE ;
2016-05-27 08:48:59 +03:00
# else
2017-04-18 09:20:13 +03:00
address_markers [ i + + ] . start_address = VMEMMAP_BASE ;
# endif
# else /* !CONFIG_PPC64 */
address_markers [ i + + ] . start_address = ioremap_bot ;
address_markers [ i + + ] . start_address = IOREMAP_TOP ;
# ifdef CONFIG_NOT_COHERENT_CACHE
address_markers [ i + + ] . start_address = IOREMAP_TOP ;
address_markers [ i + + ] . start_address = IOREMAP_TOP +
CONFIG_CONSISTENT_SIZE ;
# endif
# ifdef CONFIG_HIGHMEM
address_markers [ i + + ] . start_address = PKMAP_BASE ;
address_markers [ i + + ] . start_address = PKMAP_ADDR ( LAST_PKMAP ) ;
2016-05-27 08:48:59 +03:00
# endif
2017-04-18 09:20:13 +03:00
address_markers [ i + + ] . start_address = FIXADDR_START ;
address_markers [ i + + ] . start_address = FIXADDR_TOP ;
# endif /* CONFIG_PPC64 */
2016-05-27 08:48:59 +03:00
}
static int ptdump_show ( struct seq_file * m , void * v )
{
struct pg_state st = {
. seq = m ,
. start_address = KERN_VIRT_START ,
. marker = address_markers ,
} ;
/* Traverse kernel page tables */
walk_pagetables ( & st ) ;
note_page ( & st , 0 , 0 , 0 ) ;
return 0 ;
}
static int ptdump_open ( struct inode * inode , struct file * file )
{
return single_open ( file , ptdump_show , NULL ) ;
}
static const struct file_operations ptdump_fops = {
. open = ptdump_open ,
. read = seq_read ,
. llseek = seq_lseek ,
. release = single_release ,
} ;
static void build_pgtable_complete_mask ( void )
{
unsigned int i , j ;
for ( i = 0 ; i < ARRAY_SIZE ( pg_level ) ; i + + )
if ( pg_level [ i ] . flag )
for ( j = 0 ; j < pg_level [ i ] . num ; j + + )
pg_level [ i ] . mask | = pg_level [ i ] . flag [ j ] . mask ;
}
static int ptdump_init ( void )
{
struct dentry * debugfs_file ;
populate_markers ( ) ;
build_pgtable_complete_mask ( ) ;
2017-04-18 09:20:15 +03:00
debugfs_file = debugfs_create_file ( " kernel_page_tables " , 0400 , NULL ,
2016-05-27 08:48:59 +03:00
NULL , & ptdump_fops ) ;
return debugfs_file ? 0 : - ENOMEM ;
}
device_initcall ( ptdump_init ) ;