2007-07-16 10:40:59 +04:00
/*
* This program is free software ; you can redistribute it and / or
* modify it under the terms of the GNU General Public License as
* published by the Free Software Foundation , version 2 of the
* License .
*/
2011-05-23 22:51:41 +04:00
# include <linux/export.h>
2007-07-16 10:40:59 +04:00
# include <linux/nsproxy.h>
2008-04-29 11:59:25 +04:00
# include <linux/slab.h>
2007-07-16 10:40:59 +04:00
# include <linux/user_namespace.h>
2013-04-12 04:50:06 +04:00
# include <linux/proc_ns.h>
2010-06-13 07:28:03 +04:00
# include <linux/highuid.h>
2008-10-16 01:38:45 +04:00
# include <linux/cred.h>
2011-11-17 13:59:07 +04:00
# include <linux/securebits.h>
2011-11-17 12:11:58 +04:00
# include <linux/keyctl.h>
# include <linux/key-type.h>
# include <keys/user-type.h>
# include <linux/seq_file.h>
# include <linux/fs.h>
# include <linux/uaccess.h>
# include <linux/ctype.h>
2012-08-30 12:24:05 +04:00
# include <linux/projid.h>
2013-03-13 22:51:49 +04:00
# include <linux/fs_struct.h>
2007-07-16 10:40:59 +04:00
2011-01-13 04:00:46 +03:00
static struct kmem_cache * user_ns_cachep __read_mostly ;
2014-12-09 23:03:14 +03:00
static DEFINE_MUTEX ( userns_state_mutex ) ;
2011-01-13 04:00:46 +03:00
2013-04-15 00:47:02 +04:00
static bool new_idmap_permitted ( const struct file * file ,
struct user_namespace * ns , int cap_setid ,
2011-11-17 12:11:58 +04:00
struct uid_gid_map * map ) ;
2012-07-26 17:24:06 +04:00
static void set_cred_user_ns ( struct cred * cred , struct user_namespace * user_ns )
{
/* Start with the same capabilities as init but useless for doing
* anything as the capabilities are bound to the new user namespace .
*/
cred - > securebits = SECUREBITS_DEFAULT ;
cred - > cap_inheritable = CAP_EMPTY_SET ;
cred - > cap_permitted = CAP_FULL_SET ;
cred - > cap_effective = CAP_FULL_SET ;
cred - > cap_bset = CAP_FULL_SET ;
# ifdef CONFIG_KEYS
key_put ( cred - > request_key_auth ) ;
cred - > request_key_auth = NULL ;
# endif
/* tgcred will be cleared in our caller bc CLONE_THREAD won't be set */
cred - > user_ns = user_ns ;
}
2007-07-16 10:41:01 +04:00
/*
2008-10-16 01:38:45 +04:00
* Create a new user namespace , deriving the creator from the user in the
* passed credentials , and replacing that user with the new root user for the
* new namespace .
*
* This is called by copy_creds ( ) , which will finish setting the target task ' s
* credentials .
2007-07-16 10:41:01 +04:00
*/
2008-10-16 01:38:45 +04:00
int create_user_ns ( struct cred * new )
2007-07-16 10:41:01 +04:00
{
2011-11-17 09:52:53 +04:00
struct user_namespace * ns , * parent_ns = new - > user_ns ;
2012-02-08 19:00:08 +04:00
kuid_t owner = new - > euid ;
kgid_t group = new - > egid ;
2011-06-15 21:21:48 +04:00
int ret ;
2011-11-17 13:32:59 +04:00
2013-08-08 20:55:32 +04:00
if ( parent_ns - > level > 32 )
return - EUSERS ;
2013-03-15 12:45:51 +04:00
/*
* Verify that we can not violate the policy of which files
* may be accessed that is specified by the root directory ,
* by verifing that the root directory is at the root of the
* mount namespace which allows all files to be accessed .
*/
if ( current_chrooted ( ) )
return - EPERM ;
2011-11-17 13:32:59 +04:00
/* The creator needs a mapping in the parent user namespace
* or else we won ' t be able to reasonably tell userspace who
* created a user_namespace .
*/
if ( ! kuid_has_mapping ( parent_ns , owner ) | |
! kgid_has_mapping ( parent_ns , group ) )
return - EPERM ;
2007-07-16 10:41:01 +04:00
2011-11-17 12:11:58 +04:00
ns = kmem_cache_zalloc ( user_ns_cachep , GFP_KERNEL ) ;
2007-07-16 10:41:01 +04:00
if ( ! ns )
2008-10-16 01:38:45 +04:00
return - ENOMEM ;
2007-07-16 10:41:01 +04:00
2014-11-01 07:45:45 +03:00
ret = ns_alloc_inum ( & ns - > ns ) ;
2011-06-15 21:21:48 +04:00
if ( ret ) {
kmem_cache_free ( user_ns_cachep , ns ) ;
return ret ;
}
2014-11-01 09:32:53 +03:00
ns - > ns . ops = & userns_operations ;
2011-06-15 21:21:48 +04:00
2012-12-29 06:58:39 +04:00
atomic_set ( & ns - > count , 1 ) ;
2012-07-26 17:24:06 +04:00
/* Leave the new->user_ns reference with the new user namespace. */
2011-11-17 09:59:43 +04:00
ns - > parent = parent_ns ;
2013-08-08 20:55:32 +04:00
ns - > level = parent_ns - > level + 1 ;
2011-11-17 13:32:59 +04:00
ns - > owner = owner ;
ns - > group = group ;
2011-11-17 12:11:58 +04:00
2014-12-02 21:27:26 +03:00
/* Inherit USERNS_SETGROUPS_ALLOWED from our parent */
mutex_lock ( & userns_state_mutex ) ;
ns - > flags = parent_ns - > flags ;
mutex_unlock ( & userns_state_mutex ) ;
2012-07-26 17:24:06 +04:00
set_cred_user_ns ( new , ns ) ;
2011-11-17 09:52:53 +04:00
2013-09-24 13:35:19 +04:00
# ifdef CONFIG_PERSISTENT_KEYRINGS
init_rwsem ( & ns - > persistent_keyring_register_sem ) ;
# endif
2008-10-16 01:38:45 +04:00
return 0 ;
2007-07-16 10:40:59 +04:00
}
2012-07-26 16:15:35 +04:00
int unshare_userns ( unsigned long unshare_flags , struct cred * * new_cred )
{
struct cred * cred ;
2013-08-06 21:38:55 +04:00
int err = - ENOMEM ;
2012-07-26 16:15:35 +04:00
if ( ! ( unshare_flags & CLONE_NEWUSER ) )
return 0 ;
cred = prepare_creds ( ) ;
2013-08-06 21:38:55 +04:00
if ( cred ) {
err = create_user_ns ( cred ) ;
if ( err )
put_cred ( cred ) ;
else
* new_cred = cred ;
}
2012-07-26 16:15:35 +04:00
2013-08-06 21:38:55 +04:00
return err ;
2012-07-26 16:15:35 +04:00
}
2012-12-29 06:58:39 +04:00
void free_user_ns ( struct user_namespace * ns )
2007-07-16 10:40:59 +04:00
{
2012-12-29 06:58:39 +04:00
struct user_namespace * parent ;
2011-11-17 13:32:59 +04:00
2012-12-29 06:58:39 +04:00
do {
parent = ns - > parent ;
2013-09-24 13:35:19 +04:00
# ifdef CONFIG_PERSISTENT_KEYRINGS
key_put ( ns - > persistent_keyring_register ) ;
# endif
2014-11-01 07:45:45 +03:00
ns_free_inum ( & ns - > ns ) ;
2012-12-29 06:58:39 +04:00
kmem_cache_free ( user_ns_cachep , ns ) ;
ns = parent ;
} while ( atomic_dec_and_test ( & parent - > count ) ) ;
2007-07-16 10:40:59 +04:00
}
2008-04-29 11:59:52 +04:00
EXPORT_SYMBOL ( free_user_ns ) ;
2010-06-13 07:28:03 +04:00
2011-11-17 12:11:58 +04:00
static u32 map_id_range_down ( struct uid_gid_map * map , u32 id , u32 count )
2010-06-13 07:28:03 +04:00
{
2011-11-17 12:11:58 +04:00
unsigned idx , extents ;
u32 first , last , id2 ;
2010-06-13 07:28:03 +04:00
2011-11-17 12:11:58 +04:00
id2 = id + count - 1 ;
2010-06-13 07:28:03 +04:00
2011-11-17 12:11:58 +04:00
/* Find the matching extent */
extents = map - > nr_extents ;
2014-04-15 00:58:55 +04:00
smp_rmb ( ) ;
2011-11-17 12:11:58 +04:00
for ( idx = 0 ; idx < extents ; idx + + ) {
first = map - > extent [ idx ] . first ;
last = first + map - > extent [ idx ] . count - 1 ;
if ( id > = first & & id < = last & &
( id2 > = first & & id2 < = last ) )
break ;
}
/* Map the id or note failure */
if ( idx < extents )
id = ( id - first ) + map - > extent [ idx ] . lower_first ;
else
id = ( u32 ) - 1 ;
return id ;
}
static u32 map_id_down ( struct uid_gid_map * map , u32 id )
{
unsigned idx , extents ;
u32 first , last ;
/* Find the matching extent */
extents = map - > nr_extents ;
2014-04-15 00:58:55 +04:00
smp_rmb ( ) ;
2011-11-17 12:11:58 +04:00
for ( idx = 0 ; idx < extents ; idx + + ) {
first = map - > extent [ idx ] . first ;
last = first + map - > extent [ idx ] . count - 1 ;
if ( id > = first & & id < = last )
break ;
}
/* Map the id or note failure */
if ( idx < extents )
id = ( id - first ) + map - > extent [ idx ] . lower_first ;
else
id = ( u32 ) - 1 ;
return id ;
}
static u32 map_id_up ( struct uid_gid_map * map , u32 id )
{
unsigned idx , extents ;
u32 first , last ;
/* Find the matching extent */
extents = map - > nr_extents ;
2014-04-15 00:58:55 +04:00
smp_rmb ( ) ;
2011-11-17 12:11:58 +04:00
for ( idx = 0 ; idx < extents ; idx + + ) {
first = map - > extent [ idx ] . lower_first ;
last = first + map - > extent [ idx ] . count - 1 ;
if ( id > = first & & id < = last )
break ;
2010-06-13 07:28:03 +04:00
}
2011-11-17 12:11:58 +04:00
/* Map the id or note failure */
if ( idx < extents )
id = ( id - first ) + map - > extent [ idx ] . first ;
else
id = ( u32 ) - 1 ;
return id ;
}
/**
* make_kuid - Map a user - namespace uid pair into a kuid .
* @ ns : User namespace that the uid is in
* @ uid : User identifier
*
* Maps a user - namespace uid pair into a kernel internal kuid ,
* and returns that kuid .
*
* When there is no mapping defined for the user - namespace uid
* pair INVALID_UID is returned . Callers are expected to test
2014-02-17 07:58:12 +04:00
* for and handle INVALID_UID being returned . INVALID_UID
2011-11-17 12:11:58 +04:00
* may be tested for using uid_valid ( ) .
*/
kuid_t make_kuid ( struct user_namespace * ns , uid_t uid )
{
/* Map the uid to a global kernel uid */
return KUIDT_INIT ( map_id_down ( & ns - > uid_map , uid ) ) ;
}
EXPORT_SYMBOL ( make_kuid ) ;
/**
* from_kuid - Create a uid from a kuid user - namespace pair .
* @ targ : The user namespace we want a uid in .
* @ kuid : The kernel internal uid to start with .
*
* Map @ kuid into the user - namespace specified by @ targ and
* return the resulting uid .
*
* There is always a mapping into the initial user_namespace .
*
* If @ kuid has no mapping in @ targ ( uid_t ) - 1 is returned .
*/
uid_t from_kuid ( struct user_namespace * targ , kuid_t kuid )
{
/* Map the uid from a global kernel uid */
return map_id_up ( & targ - > uid_map , __kuid_val ( kuid ) ) ;
}
EXPORT_SYMBOL ( from_kuid ) ;
/**
* from_kuid_munged - Create a uid from a kuid user - namespace pair .
* @ targ : The user namespace we want a uid in .
* @ kuid : The kernel internal uid to start with .
*
* Map @ kuid into the user - namespace specified by @ targ and
* return the resulting uid .
*
* There is always a mapping into the initial user_namespace .
*
* Unlike from_kuid from_kuid_munged never fails and always
* returns a valid uid . This makes from_kuid_munged appropriate
* for use in syscalls like stat and getuid where failing the
* system call and failing to provide a valid uid are not an
* options .
*
* If @ kuid has no mapping in @ targ overflowuid is returned .
*/
uid_t from_kuid_munged ( struct user_namespace * targ , kuid_t kuid )
{
uid_t uid ;
uid = from_kuid ( targ , kuid ) ;
if ( uid = = ( uid_t ) - 1 )
uid = overflowuid ;
return uid ;
}
EXPORT_SYMBOL ( from_kuid_munged ) ;
/**
* make_kgid - Map a user - namespace gid pair into a kgid .
* @ ns : User namespace that the gid is in
2014-06-07 01:37:21 +04:00
* @ gid : group identifier
2011-11-17 12:11:58 +04:00
*
* Maps a user - namespace gid pair into a kernel internal kgid ,
* and returns that kgid .
*
* When there is no mapping defined for the user - namespace gid
* pair INVALID_GID is returned . Callers are expected to test
* for and handle INVALID_GID being returned . INVALID_GID may be
* tested for using gid_valid ( ) .
*/
kgid_t make_kgid ( struct user_namespace * ns , gid_t gid )
{
/* Map the gid to a global kernel gid */
return KGIDT_INIT ( map_id_down ( & ns - > gid_map , gid ) ) ;
}
EXPORT_SYMBOL ( make_kgid ) ;
/**
* from_kgid - Create a gid from a kgid user - namespace pair .
* @ targ : The user namespace we want a gid in .
* @ kgid : The kernel internal gid to start with .
*
* Map @ kgid into the user - namespace specified by @ targ and
* return the resulting gid .
*
* There is always a mapping into the initial user_namespace .
*
* If @ kgid has no mapping in @ targ ( gid_t ) - 1 is returned .
*/
gid_t from_kgid ( struct user_namespace * targ , kgid_t kgid )
{
/* Map the gid from a global kernel gid */
return map_id_up ( & targ - > gid_map , __kgid_val ( kgid ) ) ;
}
EXPORT_SYMBOL ( from_kgid ) ;
/**
* from_kgid_munged - Create a gid from a kgid user - namespace pair .
* @ targ : The user namespace we want a gid in .
* @ kgid : The kernel internal gid to start with .
*
* Map @ kgid into the user - namespace specified by @ targ and
* return the resulting gid .
*
* There is always a mapping into the initial user_namespace .
*
* Unlike from_kgid from_kgid_munged never fails and always
* returns a valid gid . This makes from_kgid_munged appropriate
* for use in syscalls like stat and getgid where failing the
* system call and failing to provide a valid gid are not options .
*
* If @ kgid has no mapping in @ targ overflowgid is returned .
*/
gid_t from_kgid_munged ( struct user_namespace * targ , kgid_t kgid )
{
gid_t gid ;
gid = from_kgid ( targ , kgid ) ;
if ( gid = = ( gid_t ) - 1 )
gid = overflowgid ;
return gid ;
}
EXPORT_SYMBOL ( from_kgid_munged ) ;
2012-08-30 12:24:05 +04:00
/**
* make_kprojid - Map a user - namespace projid pair into a kprojid .
* @ ns : User namespace that the projid is in
* @ projid : Project identifier
*
* Maps a user - namespace uid pair into a kernel internal kuid ,
* and returns that kuid .
*
* When there is no mapping defined for the user - namespace projid
* pair INVALID_PROJID is returned . Callers are expected to test
* for and handle handle INVALID_PROJID being returned . INVALID_PROJID
* may be tested for using projid_valid ( ) .
*/
kprojid_t make_kprojid ( struct user_namespace * ns , projid_t projid )
{
/* Map the uid to a global kernel uid */
return KPROJIDT_INIT ( map_id_down ( & ns - > projid_map , projid ) ) ;
}
EXPORT_SYMBOL ( make_kprojid ) ;
/**
* from_kprojid - Create a projid from a kprojid user - namespace pair .
* @ targ : The user namespace we want a projid in .
* @ kprojid : The kernel internal project identifier to start with .
*
* Map @ kprojid into the user - namespace specified by @ targ and
* return the resulting projid .
*
* There is always a mapping into the initial user_namespace .
*
* If @ kprojid has no mapping in @ targ ( projid_t ) - 1 is returned .
*/
projid_t from_kprojid ( struct user_namespace * targ , kprojid_t kprojid )
{
/* Map the uid from a global kernel uid */
return map_id_up ( & targ - > projid_map , __kprojid_val ( kprojid ) ) ;
}
EXPORT_SYMBOL ( from_kprojid ) ;
/**
* from_kprojid_munged - Create a projiid from a kprojid user - namespace pair .
* @ targ : The user namespace we want a projid in .
* @ kprojid : The kernel internal projid to start with .
*
* Map @ kprojid into the user - namespace specified by @ targ and
* return the resulting projid .
*
* There is always a mapping into the initial user_namespace .
*
* Unlike from_kprojid from_kprojid_munged never fails and always
* returns a valid projid . This makes from_kprojid_munged
* appropriate for use in syscalls like stat and where
* failing the system call and failing to provide a valid projid are
* not an options .
*
* If @ kprojid has no mapping in @ targ OVERFLOW_PROJID is returned .
*/
projid_t from_kprojid_munged ( struct user_namespace * targ , kprojid_t kprojid )
{
projid_t projid ;
projid = from_kprojid ( targ , kprojid ) ;
if ( projid = = ( projid_t ) - 1 )
projid = OVERFLOW_PROJID ;
return projid ;
}
EXPORT_SYMBOL ( from_kprojid_munged ) ;
2011-11-17 12:11:58 +04:00
static int uid_m_show ( struct seq_file * seq , void * v )
{
struct user_namespace * ns = seq - > private ;
struct uid_gid_extent * extent = v ;
struct user_namespace * lower_ns ;
uid_t lower ;
2010-06-13 07:28:03 +04:00
2012-08-15 08:25:13 +04:00
lower_ns = seq_user_ns ( seq ) ;
2011-11-17 12:11:58 +04:00
if ( ( lower_ns = = ns ) & & lower_ns - > parent )
lower_ns = lower_ns - > parent ;
lower = from_kuid ( lower_ns , KUIDT_INIT ( extent - > lower_first ) ) ;
seq_printf ( seq , " %10u %10u %10u \n " ,
extent - > first ,
lower ,
extent - > count ) ;
return 0 ;
2010-06-13 07:28:03 +04:00
}
2011-11-17 12:11:58 +04:00
static int gid_m_show ( struct seq_file * seq , void * v )
2010-06-13 07:28:03 +04:00
{
2011-11-17 12:11:58 +04:00
struct user_namespace * ns = seq - > private ;
struct uid_gid_extent * extent = v ;
struct user_namespace * lower_ns ;
gid_t lower ;
2010-06-13 07:28:03 +04:00
2012-08-15 08:25:13 +04:00
lower_ns = seq_user_ns ( seq ) ;
2011-11-17 12:11:58 +04:00
if ( ( lower_ns = = ns ) & & lower_ns - > parent )
lower_ns = lower_ns - > parent ;
2010-06-13 07:28:03 +04:00
2011-11-17 12:11:58 +04:00
lower = from_kgid ( lower_ns , KGIDT_INIT ( extent - > lower_first ) ) ;
seq_printf ( seq , " %10u %10u %10u \n " ,
extent - > first ,
lower ,
extent - > count ) ;
return 0 ;
}
2012-08-30 12:24:05 +04:00
static int projid_m_show ( struct seq_file * seq , void * v )
{
struct user_namespace * ns = seq - > private ;
struct uid_gid_extent * extent = v ;
struct user_namespace * lower_ns ;
projid_t lower ;
lower_ns = seq_user_ns ( seq ) ;
if ( ( lower_ns = = ns ) & & lower_ns - > parent )
lower_ns = lower_ns - > parent ;
lower = from_kprojid ( lower_ns , KPROJIDT_INIT ( extent - > lower_first ) ) ;
seq_printf ( seq , " %10u %10u %10u \n " ,
extent - > first ,
lower ,
extent - > count ) ;
return 0 ;
}
2014-06-07 01:37:21 +04:00
static void * m_start ( struct seq_file * seq , loff_t * ppos ,
struct uid_gid_map * map )
2011-11-17 12:11:58 +04:00
{
struct uid_gid_extent * extent = NULL ;
loff_t pos = * ppos ;
if ( pos < map - > nr_extents )
extent = & map - > extent [ pos ] ;
return extent ;
}
static void * uid_m_start ( struct seq_file * seq , loff_t * ppos )
{
struct user_namespace * ns = seq - > private ;
return m_start ( seq , ppos , & ns - > uid_map ) ;
}
static void * gid_m_start ( struct seq_file * seq , loff_t * ppos )
{
struct user_namespace * ns = seq - > private ;
return m_start ( seq , ppos , & ns - > gid_map ) ;
}
2012-08-30 12:24:05 +04:00
static void * projid_m_start ( struct seq_file * seq , loff_t * ppos )
{
struct user_namespace * ns = seq - > private ;
return m_start ( seq , ppos , & ns - > projid_map ) ;
}
2011-11-17 12:11:58 +04:00
static void * m_next ( struct seq_file * seq , void * v , loff_t * pos )
{
( * pos ) + + ;
return seq - > op - > start ( seq , pos ) ;
}
static void m_stop ( struct seq_file * seq , void * v )
{
return ;
}
2014-08-09 01:21:22 +04:00
const struct seq_operations proc_uid_seq_operations = {
2011-11-17 12:11:58 +04:00
. start = uid_m_start ,
. stop = m_stop ,
. next = m_next ,
. show = uid_m_show ,
} ;
2014-08-09 01:21:22 +04:00
const struct seq_operations proc_gid_seq_operations = {
2011-11-17 12:11:58 +04:00
. start = gid_m_start ,
. stop = m_stop ,
. next = m_next ,
. show = gid_m_show ,
} ;
2014-08-09 01:21:22 +04:00
const struct seq_operations proc_projid_seq_operations = {
2012-08-30 12:24:05 +04:00
. start = projid_m_start ,
. stop = m_stop ,
. next = m_next ,
. show = projid_m_show ,
} ;
2014-06-07 01:37:21 +04:00
static bool mappings_overlap ( struct uid_gid_map * new_map ,
struct uid_gid_extent * extent )
2012-12-28 10:27:29 +04:00
{
u32 upper_first , lower_first , upper_last , lower_last ;
unsigned idx ;
upper_first = extent - > first ;
lower_first = extent - > lower_first ;
upper_last = upper_first + extent - > count - 1 ;
lower_last = lower_first + extent - > count - 1 ;
for ( idx = 0 ; idx < new_map - > nr_extents ; idx + + ) {
u32 prev_upper_first , prev_lower_first ;
u32 prev_upper_last , prev_lower_last ;
struct uid_gid_extent * prev ;
prev = & new_map - > extent [ idx ] ;
prev_upper_first = prev - > first ;
prev_lower_first = prev - > lower_first ;
prev_upper_last = prev_upper_first + prev - > count - 1 ;
prev_lower_last = prev_lower_first + prev - > count - 1 ;
/* Does the upper range intersect a previous extent? */
if ( ( prev_upper_first < = upper_last ) & &
( prev_upper_last > = upper_first ) )
return true ;
/* Does the lower range intersect a previous extent? */
if ( ( prev_lower_first < = lower_last ) & &
( prev_lower_last > = lower_first ) )
return true ;
}
return false ;
}
2011-11-17 12:11:58 +04:00
static ssize_t map_write ( struct file * file , const char __user * buf ,
size_t count , loff_t * ppos ,
int cap_setid ,
struct uid_gid_map * map ,
struct uid_gid_map * parent_map )
{
struct seq_file * seq = file - > private_data ;
struct user_namespace * ns = seq - > private ;
struct uid_gid_map new_map ;
unsigned idx ;
2012-12-28 10:27:29 +04:00
struct uid_gid_extent * extent = NULL ;
2011-11-17 12:11:58 +04:00
unsigned long page = 0 ;
char * kbuf , * pos , * next_line ;
ssize_t ret = - EINVAL ;
/*
2014-12-09 23:03:14 +03:00
* The userns_state_mutex serializes all writes to any given map .
2011-11-17 12:11:58 +04:00
*
* Any map is only ever written once .
*
* An id map fits within 1 cache line on most architectures .
*
* On read nothing needs to be done unless you are on an
* architecture with a crazy cache coherency model like alpha .
*
* There is a one time data dependency between reading the
* count of the extents and the values of the extents . The
* desired behavior is to see the values of the extents that
* were written before the count of the extents .
*
* To achieve this smp_wmb ( ) is used on guarantee the write
2014-04-15 00:58:55 +04:00
* order and smp_rmb ( ) is guaranteed that we don ' t have crazy
* architectures returning stale data .
2011-11-17 12:11:58 +04:00
*/
2014-12-09 23:03:14 +03:00
mutex_lock ( & userns_state_mutex ) ;
2011-11-17 12:11:58 +04:00
ret = - EPERM ;
/* Only allow one successful write to the map */
if ( map - > nr_extents ! = 0 )
goto out ;
2013-04-14 22:44:04 +04:00
/*
* Adjusting namespace settings requires capabilities on the target .
2010-06-13 07:28:03 +04:00
*/
2013-04-14 22:44:04 +04:00
if ( cap_valid ( cap_setid ) & & ! file_ns_capable ( file , ns , CAP_SYS_ADMIN ) )
2011-11-17 12:11:58 +04:00
goto out ;
/* Get a buffer */
ret = - ENOMEM ;
page = __get_free_page ( GFP_TEMPORARY ) ;
kbuf = ( char * ) page ;
if ( ! page )
goto out ;
2014-12-06 05:03:28 +03:00
/* Only allow < page size writes at the beginning of the file */
2011-11-17 12:11:58 +04:00
ret = - EINVAL ;
if ( ( * ppos ! = 0 ) | | ( count > = PAGE_SIZE ) )
goto out ;
/* Slurp in the user data */
ret = - EFAULT ;
if ( copy_from_user ( kbuf , buf , count ) )
goto out ;
kbuf [ count ] = ' \0 ' ;
/* Parse the user data */
ret = - EINVAL ;
pos = kbuf ;
new_map . nr_extents = 0 ;
2014-06-07 01:37:21 +04:00
for ( ; pos ; pos = next_line ) {
2011-11-17 12:11:58 +04:00
extent = & new_map . extent [ new_map . nr_extents ] ;
/* Find the end of line and ensure I don't look past it */
next_line = strchr ( pos , ' \n ' ) ;
if ( next_line ) {
* next_line = ' \0 ' ;
next_line + + ;
if ( * next_line = = ' \0 ' )
next_line = NULL ;
2010-06-13 07:28:03 +04:00
}
2011-11-17 12:11:58 +04:00
pos = skip_spaces ( pos ) ;
extent - > first = simple_strtoul ( pos , & pos , 10 ) ;
if ( ! isspace ( * pos ) )
goto out ;
pos = skip_spaces ( pos ) ;
extent - > lower_first = simple_strtoul ( pos , & pos , 10 ) ;
if ( ! isspace ( * pos ) )
goto out ;
pos = skip_spaces ( pos ) ;
extent - > count = simple_strtoul ( pos , & pos , 10 ) ;
if ( * pos & & ! isspace ( * pos ) )
goto out ;
/* Verify there is not trailing junk on the line */
pos = skip_spaces ( pos ) ;
if ( * pos ! = ' \0 ' )
goto out ;
/* Verify we have been given valid starting values */
if ( ( extent - > first = = ( u32 ) - 1 ) | |
2014-06-07 01:37:21 +04:00
( extent - > lower_first = = ( u32 ) - 1 ) )
2011-11-17 12:11:58 +04:00
goto out ;
2014-06-07 01:37:21 +04:00
/* Verify count is not zero and does not cause the
* extent to wrap
*/
2011-11-17 12:11:58 +04:00
if ( ( extent - > first + extent - > count ) < = extent - > first )
goto out ;
2014-06-07 01:37:21 +04:00
if ( ( extent - > lower_first + extent - > count ) < =
extent - > lower_first )
2011-11-17 12:11:58 +04:00
goto out ;
2012-12-28 10:27:29 +04:00
/* Do the ranges in extent overlap any previous extents? */
if ( mappings_overlap ( & new_map , extent ) )
2011-11-17 12:11:58 +04:00
goto out ;
new_map . nr_extents + + ;
/* Fail if the file contains too many extents */
if ( ( new_map . nr_extents = = UID_GID_MAP_MAX_EXTENTS ) & &
( next_line ! = NULL ) )
goto out ;
2010-06-13 07:28:03 +04:00
}
2011-11-17 12:11:58 +04:00
/* Be very certaint the new map actually exists */
if ( new_map . nr_extents = = 0 )
goto out ;
ret = - EPERM ;
/* Validate the user is allowed to use user id's mapped to. */
2013-04-15 00:47:02 +04:00
if ( ! new_idmap_permitted ( file , ns , cap_setid , & new_map ) )
2011-11-17 12:11:58 +04:00
goto out ;
/* Map the lower ids from the parent user namespace to the
* kernel global id space .
*/
for ( idx = 0 ; idx < new_map . nr_extents ; idx + + ) {
u32 lower_first ;
extent = & new_map . extent [ idx ] ;
lower_first = map_id_range_down ( parent_map ,
extent - > lower_first ,
extent - > count ) ;
/* Fail if we can not map the specified extent to
* the kernel global id space .
*/
if ( lower_first = = ( u32 ) - 1 )
goto out ;
extent - > lower_first = lower_first ;
}
/* Install the map */
memcpy ( map - > extent , new_map . extent ,
new_map . nr_extents * sizeof ( new_map . extent [ 0 ] ) ) ;
smp_wmb ( ) ;
map - > nr_extents = new_map . nr_extents ;
* ppos = count ;
ret = count ;
out :
2014-12-09 23:03:14 +03:00
mutex_unlock ( & userns_state_mutex ) ;
2011-11-17 12:11:58 +04:00
if ( page )
free_page ( page ) ;
return ret ;
}
2014-06-07 01:37:21 +04:00
ssize_t proc_uid_map_write ( struct file * file , const char __user * buf ,
size_t size , loff_t * ppos )
2011-11-17 12:11:58 +04:00
{
struct seq_file * seq = file - > private_data ;
struct user_namespace * ns = seq - > private ;
2012-08-15 08:25:13 +04:00
struct user_namespace * seq_ns = seq_user_ns ( seq ) ;
2011-11-17 12:11:58 +04:00
if ( ! ns - > parent )
return - EPERM ;
2012-08-15 08:25:13 +04:00
if ( ( seq_ns ! = ns ) & & ( seq_ns ! = ns - > parent ) )
return - EPERM ;
2011-11-17 12:11:58 +04:00
return map_write ( file , buf , size , ppos , CAP_SETUID ,
& ns - > uid_map , & ns - > parent - > uid_map ) ;
}
2014-06-07 01:37:21 +04:00
ssize_t proc_gid_map_write ( struct file * file , const char __user * buf ,
size_t size , loff_t * ppos )
2011-11-17 12:11:58 +04:00
{
struct seq_file * seq = file - > private_data ;
struct user_namespace * ns = seq - > private ;
2012-08-15 08:25:13 +04:00
struct user_namespace * seq_ns = seq_user_ns ( seq ) ;
2011-11-17 12:11:58 +04:00
if ( ! ns - > parent )
return - EPERM ;
2012-08-15 08:25:13 +04:00
if ( ( seq_ns ! = ns ) & & ( seq_ns ! = ns - > parent ) )
return - EPERM ;
2011-11-17 12:11:58 +04:00
return map_write ( file , buf , size , ppos , CAP_SETGID ,
& ns - > gid_map , & ns - > parent - > gid_map ) ;
}
2014-06-07 01:37:21 +04:00
ssize_t proc_projid_map_write ( struct file * file , const char __user * buf ,
size_t size , loff_t * ppos )
2012-08-30 12:24:05 +04:00
{
struct seq_file * seq = file - > private_data ;
struct user_namespace * ns = seq - > private ;
struct user_namespace * seq_ns = seq_user_ns ( seq ) ;
if ( ! ns - > parent )
return - EPERM ;
if ( ( seq_ns ! = ns ) & & ( seq_ns ! = ns - > parent ) )
return - EPERM ;
/* Anyone can set any valid project id no capability needed */
return map_write ( file , buf , size , ppos , - 1 ,
& ns - > projid_map , & ns - > parent - > projid_map ) ;
}
2014-06-07 01:37:21 +04:00
static bool new_idmap_permitted ( const struct file * file ,
2013-04-15 00:47:02 +04:00
struct user_namespace * ns , int cap_setid ,
2011-11-17 12:11:58 +04:00
struct uid_gid_map * new_map )
{
2014-11-27 08:22:14 +03:00
const struct cred * cred = file - > f_cred ;
2014-12-06 02:51:47 +03:00
/* Don't allow mappings that would allow anything that wouldn't
* be allowed without the establishment of unprivileged mappings .
*/
2014-11-27 08:22:14 +03:00
if ( ( new_map - > nr_extents = = 1 ) & & ( new_map - > extent [ 0 ] . count = = 1 ) & &
uid_eq ( ns - > owner , cred - > euid ) ) {
2012-07-27 17:21:27 +04:00
u32 id = new_map - > extent [ 0 ] . lower_first ;
if ( cap_setid = = CAP_SETUID ) {
kuid_t uid = make_kuid ( ns - > parent , id ) ;
2014-11-27 08:22:14 +03:00
if ( uid_eq ( uid , cred - > euid ) )
2012-07-27 17:21:27 +04:00
return true ;
2014-06-07 01:37:21 +04:00
} else if ( cap_setid = = CAP_SETGID ) {
2012-07-27 17:21:27 +04:00
kgid_t gid = make_kgid ( ns - > parent , id ) ;
2014-12-06 04:36:04 +03:00
if ( ! ( ns - > flags & USERNS_SETGROUPS_ALLOWED ) & &
gid_eq ( gid , cred - > egid ) )
2012-07-27 17:21:27 +04:00
return true ;
}
}
2012-08-30 12:24:05 +04:00
/* Allow anyone to set a mapping that doesn't require privilege */
if ( ! cap_valid ( cap_setid ) )
return true ;
2011-11-17 12:11:58 +04:00
/* Allow the specified ids if we have the appropriate capability
* ( CAP_SETUID or CAP_SETGID ) over the parent user namespace .
2013-04-15 00:47:02 +04:00
* And the opener of the id file also had the approprpiate capability .
2011-11-17 12:11:58 +04:00
*/
2013-04-15 00:47:02 +04:00
if ( ns_capable ( ns - > parent , cap_setid ) & &
file_ns_capable ( file , ns - > parent , cap_setid ) )
2011-11-17 12:11:58 +04:00
return true ;
2010-06-13 07:28:03 +04:00
2011-11-17 12:11:58 +04:00
return false ;
2010-06-13 07:28:03 +04:00
}
2011-01-13 04:00:46 +03:00
2014-12-02 21:27:26 +03:00
int proc_setgroups_show ( struct seq_file * seq , void * v )
{
struct user_namespace * ns = seq - > private ;
unsigned long userns_flags = ACCESS_ONCE ( ns - > flags ) ;
seq_printf ( seq , " %s \n " ,
( userns_flags & USERNS_SETGROUPS_ALLOWED ) ?
" allow " : " deny " ) ;
return 0 ;
}
ssize_t proc_setgroups_write ( struct file * file , const char __user * buf ,
size_t count , loff_t * ppos )
{
struct seq_file * seq = file - > private_data ;
struct user_namespace * ns = seq - > private ;
char kbuf [ 8 ] , * pos ;
bool setgroups_allowed ;
ssize_t ret ;
/* Only allow a very narrow range of strings to be written */
ret = - EINVAL ;
if ( ( * ppos ! = 0 ) | | ( count > = sizeof ( kbuf ) ) )
goto out ;
/* What was written? */
ret = - EFAULT ;
if ( copy_from_user ( kbuf , buf , count ) )
goto out ;
kbuf [ count ] = ' \0 ' ;
pos = kbuf ;
/* What is being requested? */
ret = - EINVAL ;
if ( strncmp ( pos , " allow " , 5 ) = = 0 ) {
pos + = 5 ;
setgroups_allowed = true ;
}
else if ( strncmp ( pos , " deny " , 4 ) = = 0 ) {
pos + = 4 ;
setgroups_allowed = false ;
}
else
goto out ;
/* Verify there is not trailing junk on the line */
pos = skip_spaces ( pos ) ;
if ( * pos ! = ' \0 ' )
goto out ;
ret = - EPERM ;
mutex_lock ( & userns_state_mutex ) ;
if ( setgroups_allowed ) {
/* Enabling setgroups after setgroups has been disabled
* is not allowed .
*/
if ( ! ( ns - > flags & USERNS_SETGROUPS_ALLOWED ) )
goto out_unlock ;
} else {
/* Permanently disabling setgroups after setgroups has
* been enabled by writing the gid_map is not allowed .
*/
if ( ns - > gid_map . nr_extents ! = 0 )
goto out_unlock ;
ns - > flags & = ~ USERNS_SETGROUPS_ALLOWED ;
}
mutex_unlock ( & userns_state_mutex ) ;
/* Report a successful write */
* ppos = count ;
ret = count ;
out :
return ret ;
out_unlock :
mutex_unlock ( & userns_state_mutex ) ;
goto out ;
}
2014-12-06 03:01:11 +03:00
bool userns_may_setgroups ( const struct user_namespace * ns )
{
bool allowed ;
2014-12-09 23:03:14 +03:00
mutex_lock ( & userns_state_mutex ) ;
2014-12-06 03:01:11 +03:00
/* It is not safe to use setgroups until a gid mapping in
* the user namespace has been established .
*/
allowed = ns - > gid_map . nr_extents ! = 0 ;
2014-12-02 21:27:26 +03:00
/* Is setgroups allowed? */
allowed = allowed & & ( ns - > flags & USERNS_SETGROUPS_ALLOWED ) ;
2014-12-09 23:03:14 +03:00
mutex_unlock ( & userns_state_mutex ) ;
2014-12-06 03:01:11 +03:00
return allowed ;
}
2014-11-01 07:25:30 +03:00
static inline struct user_namespace * to_user_ns ( struct ns_common * ns )
{
return container_of ( ns , struct user_namespace , ns ) ;
}
2014-11-01 07:37:32 +03:00
static struct ns_common * userns_get ( struct task_struct * task )
2012-07-26 17:24:06 +04:00
{
struct user_namespace * user_ns ;
rcu_read_lock ( ) ;
user_ns = get_user_ns ( __task_cred ( task ) - > user_ns ) ;
rcu_read_unlock ( ) ;
2014-11-01 07:25:30 +03:00
return user_ns ? & user_ns - > ns : NULL ;
2012-07-26 17:24:06 +04:00
}
2014-11-01 07:37:32 +03:00
static void userns_put ( struct ns_common * ns )
2012-07-26 17:24:06 +04:00
{
2014-11-01 07:25:30 +03:00
put_user_ns ( to_user_ns ( ns ) ) ;
2012-07-26 17:24:06 +04:00
}
2014-11-01 07:37:32 +03:00
static int userns_install ( struct nsproxy * nsproxy , struct ns_common * ns )
2012-07-26 17:24:06 +04:00
{
2014-11-01 07:25:30 +03:00
struct user_namespace * user_ns = to_user_ns ( ns ) ;
2012-07-26 17:24:06 +04:00
struct cred * cred ;
/* Don't allow gaining capabilities by reentering
* the same user namespace .
*/
if ( user_ns = = current_user_ns ( ) )
return - EINVAL ;
2012-12-10 05:19:52 +04:00
/* Threaded processes may not enter a different user namespace */
2012-07-26 17:24:06 +04:00
if ( atomic_read ( & current - > mm - > mm_users ) > 1 )
return - EINVAL ;
2013-03-13 22:51:49 +04:00
if ( current - > fs - > users ! = 1 )
return - EINVAL ;
2012-07-26 17:24:06 +04:00
if ( ! ns_capable ( user_ns , CAP_SYS_ADMIN ) )
return - EPERM ;
cred = prepare_creds ( ) ;
if ( ! cred )
return - ENOMEM ;
put_user_ns ( cred - > user_ns ) ;
set_cred_user_ns ( cred , get_user_ns ( user_ns ) ) ;
return commit_creds ( cred ) ;
}
const struct proc_ns_operations userns_operations = {
. name = " user " ,
. type = CLONE_NEWUSER ,
. get = userns_get ,
. put = userns_put ,
. install = userns_install ,
} ;
2011-01-13 04:00:46 +03:00
static __init int user_namespaces_init ( void )
{
user_ns_cachep = KMEM_CACHE ( user_namespace , SLAB_PANIC ) ;
return 0 ;
}
2014-04-04 01:48:35 +04:00
subsys_initcall ( user_namespaces_init ) ;