2019-06-04 11:11:33 +03:00
// SPDX-License-Identifier: GPL-2.0-only
2016-06-07 21:05:33 +03:00
/*
* This implements the various checks for CONFIG_HARDENED_USERCOPY * ,
* which are designed to protect kernel memory from needless exposure
* and overwrite under many unintended conditions . This code is based
* on PAX_USERCOPY , which is :
*
* Copyright ( C ) 2001 - 2016 PaX Team , Bradley Spengler , Open Source
* Security Inc .
*/
# define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
# include <linux/mm.h>
2019-09-17 21:00:25 +03:00
# include <linux/highmem.h>
2016-06-07 21:05:33 +03:00
# include <linux/slab.h>
2017-02-02 19:54:15 +03:00
# include <linux/sched.h>
2017-02-08 20:51:36 +03:00
# include <linux/sched/task.h>
# include <linux/sched/task_stack.h>
2017-02-16 21:29:15 +03:00
# include <linux/thread_info.h>
2018-07-03 22:43:08 +03:00
# include <linux/atomic.h>
# include <linux/jump_label.h>
2016-06-07 21:05:33 +03:00
# include <asm/sections.h>
2021-10-04 16:45:56 +03:00
# include "slab.h"
2016-06-07 21:05:33 +03:00
/*
* Checks if a given pointer and length is contained by the current
* stack frame ( if possible ) .
*
* Returns :
* NOT_STACK : not at all on the stack
* GOOD_FRAME : fully within a valid stack frame
* GOOD_STACK : fully on the stack ( when can ' t do frame - checking )
* BAD_STACK : error condition ( invalid stack position or bad stack frame )
*/
static noinline int check_stack_object ( const void * obj , unsigned long len )
{
const void * const stack = task_stack_page ( current ) ;
const void * const stackend = stack + THREAD_SIZE ;
int ret ;
/* Object is not on the stack at all. */
if ( obj + len < = stack | | stackend < = obj )
return NOT_STACK ;
/*
* Reject : object partially overlaps the stack ( passing the
2020-08-12 04:33:23 +03:00
* check above means at least one end is within the stack ,
2016-06-07 21:05:33 +03:00
* so if this check fails , the other end is outside the stack ) .
*/
if ( obj < stack | | stackend < obj + len )
return BAD_STACK ;
/* Check if object is safely within a valid frame. */
ret = arch_within_stack_frames ( stack , stackend , obj , len ) ;
if ( ret )
return ret ;
return GOOD_STACK ;
}
2018-01-11 01:22:38 +03:00
/*
2018-01-11 02:17:01 +03:00
* If these functions are reached , then CONFIG_HARDENED_USERCOPY has found
* an unexpected state during a copy_from_user ( ) or copy_to_user ( ) call .
2018-01-11 01:22:38 +03:00
* There are several checks being performed on the buffer by the
* __check_object_size ( ) function . Normal stack buffer usage should never
* trip the checks , and kernel text addressing will always trip the check .
2018-01-11 02:17:01 +03:00
* For cache objects , it is checking that only the whitelisted range of
* bytes for a given cache is being accessed ( via the cache ' s usersize and
* useroffset fields ) . To adjust a cache whitelist , use the usercopy - aware
* kmem_cache_create_usercopy ( ) function to create the cache ( and
* carefully audit the whitelist range ) .
2018-01-11 01:22:38 +03:00
*/
2018-01-11 02:17:01 +03:00
void usercopy_warn ( const char * name , const char * detail , bool to_user ,
unsigned long offset , unsigned long len )
{
WARN_ONCE ( 1 , " Bad or missing usercopy whitelist? Kernel memory %s attempt detected %s %s%s%s%s (offset %lu, size %lu)! \n " ,
to_user ? " exposure " : " overwrite " ,
to_user ? " from " : " to " ,
name ? : " unknown?! " ,
detail ? " ' " : " " , detail ? : " " , detail ? " ' " : " " ,
offset , len ) ;
}
2018-01-11 01:22:38 +03:00
void __noreturn usercopy_abort ( const char * name , const char * detail ,
bool to_user , unsigned long offset ,
unsigned long len )
2016-06-07 21:05:33 +03:00
{
2018-01-11 01:22:38 +03:00
pr_emerg ( " Kernel memory %s attempt detected %s %s%s%s%s (offset %lu, size %lu)! \n " ,
to_user ? " exposure " : " overwrite " ,
to_user ? " from " : " to " ,
name ? : " unknown?! " ,
detail ? " ' " : " " , detail ? : " " , detail ? " ' " : " " ,
offset , len ) ;
2016-06-07 21:05:33 +03:00
/*
* For greater effect , it would be nice to do do_group_exit ( ) ,
* but BUG ( ) actually hooks all the lock - breaking and per - arch
* Oops code , so that is used here instead .
*/
BUG ( ) ;
}
/* Returns true if any portion of [ptr,ptr+n) over laps with [low,high). */
2018-01-11 01:48:22 +03:00
static bool overlaps ( const unsigned long ptr , unsigned long n ,
unsigned long low , unsigned long high )
2016-06-07 21:05:33 +03:00
{
2018-01-11 01:48:22 +03:00
const unsigned long check_low = ptr ;
2016-06-07 21:05:33 +03:00
unsigned long check_high = check_low + n ;
/* Does not overlap if entirely above or entirely below. */
2016-08-22 19:53:59 +03:00
if ( check_low > = high | | check_high < = low )
2016-06-07 21:05:33 +03:00
return false ;
return true ;
}
/* Is this address range in the kernel text area? */
2018-01-11 01:48:22 +03:00
static inline void check_kernel_text_object ( const unsigned long ptr ,
unsigned long n , bool to_user )
2016-06-07 21:05:33 +03:00
{
unsigned long textlow = ( unsigned long ) _stext ;
unsigned long texthigh = ( unsigned long ) _etext ;
unsigned long textlow_linear , texthigh_linear ;
if ( overlaps ( ptr , n , textlow , texthigh ) )
2018-01-11 01:48:22 +03:00
usercopy_abort ( " kernel text " , NULL , to_user , ptr - textlow , n ) ;
2016-06-07 21:05:33 +03:00
/*
* Some architectures have virtual memory mappings with a secondary
* mapping of the kernel text , i . e . there is more than one virtual
* kernel address that points to the kernel image . It is usually
* when there is a separate linear physical memory mapping , in that
* __pa ( ) is not just the reverse of __va ( ) . This can be detected
* and checked :
*/
2017-01-11 00:35:45 +03:00
textlow_linear = ( unsigned long ) lm_alias ( textlow ) ;
2016-06-07 21:05:33 +03:00
/* No different mapping: we're done. */
if ( textlow_linear = = textlow )
2018-01-11 01:48:22 +03:00
return ;
2016-06-07 21:05:33 +03:00
/* Check the secondary mapping... */
2017-01-11 00:35:45 +03:00
texthigh_linear = ( unsigned long ) lm_alias ( texthigh ) ;
2016-06-07 21:05:33 +03:00
if ( overlaps ( ptr , n , textlow_linear , texthigh_linear ) )
2018-01-11 01:48:22 +03:00
usercopy_abort ( " linear kernel text " , NULL , to_user ,
ptr - textlow_linear , n ) ;
2016-06-07 21:05:33 +03:00
}
2018-01-11 01:48:22 +03:00
static inline void check_bogus_address ( const unsigned long ptr , unsigned long n ,
bool to_user )
2016-06-07 21:05:33 +03:00
{
/* Reject if object wraps past end of memory. */
2019-08-14 01:37:37 +03:00
if ( ptr + ( n - 1 ) < ptr )
2018-01-11 01:48:22 +03:00
usercopy_abort ( " wrapped address " , NULL , to_user , 0 , ptr + n ) ;
2016-06-07 21:05:33 +03:00
/* Reject if NULL or ZERO-allocation. */
if ( ZERO_OR_NULL_PTR ( ptr ) )
2018-01-11 01:48:22 +03:00
usercopy_abort ( " null address " , NULL , to_user , ptr , n ) ;
2016-06-07 21:05:33 +03:00
}
2016-09-07 19:54:34 +03:00
/* Checks for allocs that are marked in some way as spanning multiple pages. */
2018-01-11 01:48:22 +03:00
static inline void check_page_span ( const void * ptr , unsigned long n ,
struct page * page , bool to_user )
2016-06-07 21:05:33 +03:00
{
2016-09-07 19:54:34 +03:00
# ifdef CONFIG_HARDENED_USERCOPY_PAGESPAN
2016-06-07 21:05:33 +03:00
const void * end = ptr + n - 1 ;
2016-09-07 19:54:34 +03:00
struct page * endpage ;
2016-06-07 21:05:33 +03:00
bool is_reserved , is_cma ;
/*
* Sometimes the kernel data regions are not marked Reserved ( see
* check below ) . And sometimes [ _sdata , _edata ) does not cover
* rodata and / or bss , so check each range explicitly .
*/
/* Allow reads of kernel rodata region (if not marked as Reserved). */
if ( ptr > = ( const void * ) __start_rodata & &
end < = ( const void * ) __end_rodata ) {
if ( ! to_user )
2018-01-11 01:48:22 +03:00
usercopy_abort ( " rodata " , NULL , to_user , 0 , n ) ;
return ;
2016-06-07 21:05:33 +03:00
}
/* Allow kernel data region (if not marked as Reserved). */
if ( ptr > = ( const void * ) _sdata & & end < = ( const void * ) _edata )
2018-01-11 01:48:22 +03:00
return ;
2016-06-07 21:05:33 +03:00
/* Allow kernel bss region (if not marked as Reserved). */
if ( ptr > = ( const void * ) __bss_start & &
end < = ( const void * ) __bss_stop )
2018-01-11 01:48:22 +03:00
return ;
2016-06-07 21:05:33 +03:00
/* Is the object wholly within one base page? */
if ( likely ( ( ( unsigned long ) ptr & ( unsigned long ) PAGE_MASK ) = =
( ( unsigned long ) end & ( unsigned long ) PAGE_MASK ) ) )
2018-01-11 01:48:22 +03:00
return ;
2016-06-07 21:05:33 +03:00
2016-09-07 19:54:34 +03:00
/* Allow if fully inside the same compound (__GFP_COMP) page. */
2016-06-07 21:05:33 +03:00
endpage = virt_to_head_page ( end ) ;
if ( likely ( endpage = = page ) )
2018-01-11 01:48:22 +03:00
return ;
2016-06-07 21:05:33 +03:00
/*
* Reject if range is entirely either Reserved ( i . e . special or
* device memory ) , or CMA . Otherwise , reject since the object spans
* several independently allocated pages .
*/
is_reserved = PageReserved ( page ) ;
is_cma = is_migrate_cma_page ( page ) ;
if ( ! is_reserved & & ! is_cma )
2018-01-11 01:48:22 +03:00
usercopy_abort ( " spans multiple pages " , NULL , to_user , 0 , n ) ;
2016-06-07 21:05:33 +03:00
for ( ptr + = PAGE_SIZE ; ptr < = end ; ptr + = PAGE_SIZE ) {
page = virt_to_head_page ( ptr ) ;
if ( is_reserved & & ! PageReserved ( page ) )
2018-01-11 01:48:22 +03:00
usercopy_abort ( " spans Reserved and non-Reserved pages " ,
NULL , to_user , 0 , n ) ;
2016-06-07 21:05:33 +03:00
if ( is_cma & & ! is_migrate_cma_page ( page ) )
2018-01-11 01:48:22 +03:00
usercopy_abort ( " spans CMA and non-CMA pages " , NULL ,
to_user , 0 , n ) ;
2016-06-07 21:05:33 +03:00
}
2016-09-07 19:54:34 +03:00
# endif
}
2018-01-11 01:48:22 +03:00
static inline void check_heap_object ( const void * ptr , unsigned long n ,
bool to_user )
2016-09-07 19:54:34 +03:00
{
2021-10-04 16:45:56 +03:00
struct folio * folio ;
2016-09-07 19:54:34 +03:00
if ( ! virt_addr_valid ( ptr ) )
2018-01-11 01:48:22 +03:00
return ;
2016-09-07 19:54:34 +03:00
2019-09-17 21:00:25 +03:00
/*
* When CONFIG_HIGHMEM = y , kmap_to_page ( ) will give either the
* highmem page or fallback to virt_to_page ( ) . The following
2021-10-04 16:45:56 +03:00
* is effectively a highmem - aware virt_to_slab ( ) .
2019-09-17 21:00:25 +03:00
*/
2021-10-04 16:45:56 +03:00
folio = page_folio ( kmap_to_page ( ( void * ) ptr ) ) ;
2016-09-07 19:54:34 +03:00
2021-10-04 16:45:56 +03:00
if ( folio_test_slab ( folio ) ) {
2018-01-11 01:48:22 +03:00
/* Check slab allocator for flags and size. */
2021-10-04 16:45:56 +03:00
__check_heap_object ( ptr , n , folio_slab ( folio ) , to_user ) ;
2018-01-11 01:48:22 +03:00
} else {
/* Verify object does not incorrectly span multiple pages. */
2021-10-04 16:45:56 +03:00
check_page_span ( ptr , n , folio_page ( folio , 0 ) , to_user ) ;
2018-01-11 01:48:22 +03:00
}
2016-06-07 21:05:33 +03:00
}
2018-07-03 22:43:08 +03:00
static DEFINE_STATIC_KEY_FALSE_RO ( bypass_usercopy_checks ) ;
2016-06-07 21:05:33 +03:00
/*
* Validates that the given object is :
* - not bogus address
mm/usercopy.c: no check page span for stack objects
It is easy to trigger this with CONFIG_HARDENED_USERCOPY_PAGESPAN=y,
usercopy: Kernel memory overwrite attempt detected to spans multiple pages (offset 0, size 23)!
kernel BUG at mm/usercopy.c:102!
For example,
print_worker_info
char name[WQ_NAME_LEN] = { };
char desc[WORKER_DESC_LEN] = { };
probe_kernel_read(name, wq->name, sizeof(name) - 1);
probe_kernel_read(desc, worker->desc, sizeof(desc) - 1);
__copy_from_user_inatomic
check_object_size
check_heap_object
check_page_span
This is because on-stack variables could cross PAGE_SIZE boundary, and
failed this check,
if (likely(((unsigned long)ptr & (unsigned long)PAGE_MASK) ==
((unsigned long)end & (unsigned long)PAGE_MASK)))
ptr = FFFF889007D7EFF8
end = FFFF889007D7F00E
Hence, fix it by checking if it is a stack object first.
[keescook@chromium.org: improve comments after reorder]
Link: http://lkml.kernel.org/r/20190103165151.GA32845@beast
Link: http://lkml.kernel.org/r/20181231030254.99441-1-cai@lca.pw
Signed-off-by: Qian Cai <cai@lca.pw>
Signed-off-by: Kees Cook <keescook@chromium.org>
Acked-by: Kees Cook <keescook@chromium.org>
Cc: <stable@vger.kernel.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
2019-01-09 02:23:04 +03:00
* - fully contained by stack ( or stack frame , when available )
* - fully within SLAB object ( or object whitelist area , when available )
2016-06-07 21:05:33 +03:00
* - not in kernel text
*/
void __check_object_size ( const void * ptr , unsigned long n , bool to_user )
{
2018-07-03 22:43:08 +03:00
if ( static_branch_unlikely ( & bypass_usercopy_checks ) )
return ;
2016-06-07 21:05:33 +03:00
/* Skip all tests if size is zero. */
if ( ! n )
return ;
/* Check for invalid addresses. */
2018-01-11 01:48:22 +03:00
check_bogus_address ( ( const unsigned long ) ptr , n , to_user ) ;
2016-06-07 21:05:33 +03:00
/* Check for bad stack object. */
switch ( check_stack_object ( ptr , n ) ) {
case NOT_STACK :
/* Object is not touching the current process stack. */
break ;
case GOOD_FRAME :
case GOOD_STACK :
/*
* Object is either in the correct frame ( when it
* is possible to check ) or just generally on the
* process stack ( when frame checking not available ) .
*/
return ;
default :
2018-01-11 01:48:22 +03:00
usercopy_abort ( " process stack " , NULL , to_user , 0 , n ) ;
2016-06-07 21:05:33 +03:00
}
mm/usercopy.c: no check page span for stack objects
It is easy to trigger this with CONFIG_HARDENED_USERCOPY_PAGESPAN=y,
usercopy: Kernel memory overwrite attempt detected to spans multiple pages (offset 0, size 23)!
kernel BUG at mm/usercopy.c:102!
For example,
print_worker_info
char name[WQ_NAME_LEN] = { };
char desc[WORKER_DESC_LEN] = { };
probe_kernel_read(name, wq->name, sizeof(name) - 1);
probe_kernel_read(desc, worker->desc, sizeof(desc) - 1);
__copy_from_user_inatomic
check_object_size
check_heap_object
check_page_span
This is because on-stack variables could cross PAGE_SIZE boundary, and
failed this check,
if (likely(((unsigned long)ptr & (unsigned long)PAGE_MASK) ==
((unsigned long)end & (unsigned long)PAGE_MASK)))
ptr = FFFF889007D7EFF8
end = FFFF889007D7F00E
Hence, fix it by checking if it is a stack object first.
[keescook@chromium.org: improve comments after reorder]
Link: http://lkml.kernel.org/r/20190103165151.GA32845@beast
Link: http://lkml.kernel.org/r/20181231030254.99441-1-cai@lca.pw
Signed-off-by: Qian Cai <cai@lca.pw>
Signed-off-by: Kees Cook <keescook@chromium.org>
Acked-by: Kees Cook <keescook@chromium.org>
Cc: <stable@vger.kernel.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
2019-01-09 02:23:04 +03:00
/* Check for bad heap object. */
check_heap_object ( ptr , n , to_user ) ;
2016-06-07 21:05:33 +03:00
/* Check for object in kernel to avoid text exposure. */
2018-01-11 01:48:22 +03:00
check_kernel_text_object ( ( const unsigned long ) ptr , n , to_user ) ;
2016-06-07 21:05:33 +03:00
}
EXPORT_SYMBOL ( __check_object_size ) ;
2018-07-03 22:43:08 +03:00
static bool enable_checks __initdata = true ;
static int __init parse_hardened_usercopy ( char * str )
{
return strtobool ( str , & enable_checks ) ;
}
__setup ( " hardened_usercopy= " , parse_hardened_usercopy ) ;
static int __init set_hardened_usercopy ( void )
{
if ( enable_checks = = false )
static_branch_enable ( & bypass_usercopy_checks ) ;
return 1 ;
}
late_initcall ( set_hardened_usercopy ) ;