2010-08-14 21:59:25 +04:00
/*-*- Mode: C; c-basic-offset: 8; indent-tabs-mode: nil -*-*/
2010-03-31 18:29:55 +04:00
/***
This file is part of systemd .
Copyright 2010 Lennart Poettering
systemd is free software ; you can redistribute it and / or modify it
2012-04-12 02:20:58 +04:00
under the terms of the GNU Lesser General Public License as published by
the Free Software Foundation ; either version 2.1 of the License , or
2010-03-31 18:29:55 +04:00
( at your option ) any later version .
systemd is distributed in the hope that it will be useful , but
WITHOUT ANY WARRANTY ; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE . See the GNU
2012-04-12 02:20:58 +04:00
Lesser General Public License for more details .
2010-03-31 18:29:55 +04:00
2012-04-12 02:20:58 +04:00
You should have received a copy of the GNU Lesser General Public License
2010-03-31 18:29:55 +04:00
along with systemd ; If not , see < http : //www.gnu.org/licenses/>.
* * */
# include <sys/mount.h>
# include <errno.h>
# include <sys/stat.h>
# include <stdlib.h>
# include <string.h>
# include <libgen.h>
# include <assert.h>
2010-10-27 07:47:48 +04:00
# include <unistd.h>
2010-11-08 06:59:39 +03:00
# include <ftw.h>
2010-03-31 18:29:55 +04:00
# include "mount-setup.h"
2012-04-18 00:25:24 +04:00
# include "dev-setup.h"
2010-03-31 18:29:55 +04:00
# include "log.h"
2010-04-08 03:43:07 +04:00
# include "macro.h"
# include "util.h"
2010-09-15 03:38:07 +04:00
# include "label.h"
2011-08-23 02:37:35 +04:00
# include "set.h"
# include "strv.h"
2012-04-10 23:54:31 +04:00
# include "mkdir.h"
2012-05-07 23:36:12 +04:00
# include "path-util.h"
2012-06-01 23:25:29 +04:00
# include "missing.h"
2012-09-18 14:05:47 +04:00
# include "virt.h"
2013-01-19 07:41:33 +04:00
# include "efivars.h"
2013-10-09 21:52:15 +04:00
# include "smack-util.h"
2014-01-20 22:54:51 +04:00
# include "def.h"
2011-01-04 03:58:38 +03:00
2012-11-04 20:03:48 +04:00
typedef enum MountMode {
MNT_NONE = 0 ,
MNT_FATAL = 1 < < 0 ,
MNT_IN_CONTAINER = 1 < < 1 ,
} MountMode ;
2010-04-12 23:58:01 +04:00
typedef struct MountPoint {
const char * what ;
const char * where ;
const char * type ;
const char * options ;
unsigned long flags ;
2012-11-04 20:03:48 +04:00
bool ( * condition_fn ) ( void ) ;
MountMode mode ;
2010-04-12 23:58:01 +04:00
} MountPoint ;
2011-07-29 03:48:18 +04:00
/* The first three entries we might need before SELinux is up. The
2012-03-15 22:06:10 +04:00
* fourth ( securityfs ) is needed by IMA to load a custom policy . The
* other ones we can delay until SELinux and IMA are loaded . */
2013-03-07 23:06:58 +04:00
# define N_EARLY_MOUNT 5
2011-07-29 03:48:18 +04:00
2010-04-12 23:58:01 +04:00
static const MountPoint mount_table [ ] = {
2012-11-04 20:03:48 +04:00
{ " proc " , " /proc " , " proc " , NULL , MS_NOSUID | MS_NOEXEC | MS_NODEV ,
NULL , MNT_FATAL | MNT_IN_CONTAINER } ,
{ " sysfs " , " /sys " , " sysfs " , NULL , MS_NOSUID | MS_NOEXEC | MS_NODEV ,
NULL , MNT_FATAL | MNT_IN_CONTAINER } ,
{ " devtmpfs " , " /dev " , " devtmpfs " , " mode=755 " , MS_NOSUID | MS_STRICTATIME ,
NULL , MNT_FATAL | MNT_IN_CONTAINER } ,
{ " securityfs " , " /sys/kernel/security " , " securityfs " , NULL , MS_NOSUID | MS_NOEXEC | MS_NODEV ,
NULL , MNT_NONE } ,
2013-09-27 01:41:09 +04:00
# ifdef HAVE_SMACK
2013-10-09 21:52:15 +04:00
{ " smackfs " , " /sys/fs/smackfs " , " smackfs " , " smackfsdef=* " , MS_NOSUID | MS_NOEXEC | MS_NODEV | MS_STRICTATIME ,
use_smack , MNT_FATAL } ,
2013-09-27 01:41:09 +04:00
{ " tmpfs " , " /dev/shm " , " tmpfs " , " mode=1777,smackfsroot=* " , MS_NOSUID | MS_NODEV | MS_STRICTATIME ,
2013-10-09 21:52:15 +04:00
use_smack , MNT_FATAL } ,
2013-09-27 01:41:09 +04:00
# endif
2012-11-04 20:03:48 +04:00
{ " tmpfs " , " /dev/shm " , " tmpfs " , " mode=1777 " , MS_NOSUID | MS_NODEV | MS_STRICTATIME ,
NULL , MNT_FATAL | MNT_IN_CONTAINER } ,
{ " devpts " , " /dev/pts " , " devpts " , " mode=620,gid= " STRINGIFY ( TTY_GID ) , MS_NOSUID | MS_NOEXEC ,
NULL , MNT_IN_CONTAINER } ,
2013-09-27 01:41:09 +04:00
# ifdef HAVE_SMACK
{ " tmpfs " , " /run " , " tmpfs " , " mode=755,smackfsroot=* " , MS_NOSUID | MS_NODEV | MS_STRICTATIME ,
2013-10-09 21:52:15 +04:00
use_smack , MNT_FATAL } ,
2013-09-27 01:41:09 +04:00
# endif
2012-11-04 20:03:48 +04:00
{ " tmpfs " , " /run " , " tmpfs " , " mode=755 " , MS_NOSUID | MS_NODEV | MS_STRICTATIME ,
NULL , MNT_FATAL | MNT_IN_CONTAINER } ,
{ " tmpfs " , " /sys/fs/cgroup " , " tmpfs " , " mode=755 " , MS_NOSUID | MS_NOEXEC | MS_NODEV | MS_STRICTATIME ,
2014-05-05 20:49:43 +04:00
NULL , MNT_FATAL | MNT_IN_CONTAINER } ,
2013-04-23 04:39:03 +04:00
# ifdef HAVE_XATTR
{ " cgroup " , " /sys/fs/cgroup/systemd " , " cgroup " , " none,name=systemd,xattr " , MS_NOSUID | MS_NOEXEC | MS_NODEV ,
2014-05-05 20:49:43 +04:00
NULL , MNT_FATAL | MNT_IN_CONTAINER } ,
2013-04-23 04:39:03 +04:00
# endif
{ " cgroup " , " /sys/fs/cgroup/systemd " , " cgroup " , " none,name=systemd " , MS_NOSUID | MS_NOEXEC | MS_NODEV ,
2014-05-05 20:49:43 +04:00
NULL , MNT_FATAL | MNT_IN_CONTAINER } ,
2013-02-27 16:07:09 +04:00
{ " pstore " , " /sys/fs/pstore " , " pstore " , NULL , MS_NOSUID | MS_NOEXEC | MS_NODEV ,
NULL , MNT_NONE } ,
# ifdef ENABLE_EFI
{ " efivarfs " , " /sys/firmware/efi/efivars " , " efivarfs " , NULL , MS_NOSUID | MS_NOEXEC | MS_NODEV ,
is_efi_boot , MNT_NONE } ,
# endif
2010-03-31 18:29:55 +04:00
} ;
2010-07-16 04:56:57 +04:00
/* These are API file systems that might be mounted by other software,
2010-11-25 00:27:45 +03:00
* we just list them here so that we know that we should ignore them */
2010-07-16 04:56:57 +04:00
2012-04-22 03:00:57 +04:00
static const char ignore_paths [ ] =
/* SELinux file systems */
" /sys/fs/selinux \0 "
" /selinux \0 "
/* Legacy cgroup mount points */
" /dev/cgroup \0 "
" /cgroup \0 "
/* Legacy kernel file system */
" /proc/bus/usb \0 "
/* Container bind mounts */
" /proc/sys \0 "
" /dev/console \0 "
2012-09-18 14:05:47 +04:00
" /proc/kmsg \0 " ;
2010-07-16 04:56:57 +04:00
2010-04-10 19:42:00 +04:00
bool mount_point_is_api ( const char * path ) {
unsigned i ;
/* Checks if this mount point is considered "API", and hence
* should be ignored */
2010-04-12 23:58:01 +04:00
for ( i = 0 ; i < ELEMENTSOF ( mount_table ) ; i + + )
2010-08-20 05:26:15 +04:00
if ( path_equal ( path , mount_table [ i ] . where ) )
2010-04-10 19:42:00 +04:00
return true ;
2010-11-11 13:15:16 +03:00
return path_startswith ( path , " /sys/fs/cgroup/ " ) ;
}
bool mount_point_ignore ( const char * path ) {
2012-04-22 03:00:57 +04:00
const char * i ;
2010-11-11 13:15:16 +03:00
2012-04-22 03:00:57 +04:00
NULSTR_FOREACH ( i , ignore_paths )
if ( path_equal ( path , i ) )
2010-07-16 04:56:57 +04:00
return true ;
2010-11-11 13:15:16 +03:00
return false ;
2010-04-10 19:42:00 +04:00
}
2011-07-29 03:48:18 +04:00
static int mount_one ( const MountPoint * p , bool relabel ) {
2010-03-31 18:29:55 +04:00
int r ;
2010-04-12 23:58:01 +04:00
assert ( p ) ;
2010-03-31 18:29:55 +04:00
2012-11-04 20:03:48 +04:00
if ( p - > condition_fn & & ! p - > condition_fn ( ) )
return 0 ;
2011-04-07 01:38:01 +04:00
/* Relabel first, just in case */
2011-07-29 03:48:18 +04:00
if ( relabel )
2012-07-03 18:25:50 +04:00
label_fix ( p - > where , true , true ) ;
2011-04-07 01:38:01 +04:00
2012-09-18 14:05:47 +04:00
r = path_is_mount_point ( p - > where , true ) ;
if ( r < 0 )
2010-03-31 18:29:55 +04:00
return r ;
if ( r > 0 )
2011-04-07 01:38:01 +04:00
return 0 ;
2010-03-31 18:29:55 +04:00
2012-09-18 14:05:47 +04:00
/* Skip securityfs in a container */
2012-11-04 20:03:48 +04:00
if ( ! ( p - > mode & MNT_IN_CONTAINER ) & & detect_container ( NULL ) > 0 )
2012-09-18 14:05:47 +04:00
return 0 ;
2010-04-06 23:55:58 +04:00
/* The access mode here doesn't really matter too much, since
* the mounted file system will take precedence anyway . */
2014-03-25 00:04:02 +04:00
if ( relabel )
mkdir_p_label ( p - > where , 0755 ) ;
else
mkdir_p ( p - > where , 0755 ) ;
2010-04-06 23:55:58 +04:00
2010-03-31 18:29:55 +04:00
log_debug ( " Mounting %s to %s of type %s with options %s. " ,
2010-04-12 23:58:01 +04:00
p - > what ,
p - > where ,
p - > type ,
strna ( p - > options ) ) ;
if ( mount ( p - > what ,
p - > where ,
p - > type ,
p - > flags ,
p - > options ) < 0 ) {
2014-05-05 20:49:43 +04:00
log_full ( ( p - > mode & MNT_FATAL ) ? LOG_ERR : LOG_DEBUG , " Failed to mount %s at %s: %m " , p - > type , p - > where ) ;
2012-11-04 20:03:48 +04:00
return ( p - > mode & MNT_FATAL ) ? - errno : 0 ;
2010-03-31 18:29:55 +04:00
}
2011-04-07 01:38:01 +04:00
/* Relabel again, since we now mounted something fresh here */
2011-07-29 03:48:18 +04:00
if ( relabel )
2012-07-03 18:25:50 +04:00
label_fix ( p - > where , false , false ) ;
2010-09-15 03:38:07 +04:00
2011-08-23 02:37:35 +04:00
return 1 ;
2010-03-31 18:29:55 +04:00
}
2011-07-29 03:48:18 +04:00
int mount_setup_early ( void ) {
unsigned i ;
int r = 0 ;
assert_cc ( N_EARLY_MOUNT < = ELEMENTSOF ( mount_table ) ) ;
/* Do a minimal mount of /proc and friends to enable the most
* basic stuff , such as SELinux */
for ( i = 0 ; i < N_EARLY_MOUNT ; i + + ) {
int j ;
j = mount_one ( mount_table + i , false ) ;
if ( r = = 0 )
r = j ;
}
return r ;
}
2011-08-23 02:37:35 +04:00
int mount_cgroup_controllers ( char * * * join_controllers ) {
2013-04-23 16:28:10 +04:00
_cleanup_set_free_free_ Set * controllers = NULL ;
_cleanup_fclose_ FILE * f ;
2014-03-18 07:06:36 +04:00
char buf [ LINE_MAX ] ;
int r ;
2010-04-17 01:22:32 +04:00
2010-05-18 05:10:17 +04:00
/* Mount all available cgroup controllers that are built into the kernel. */
2010-04-17 01:22:32 +04:00
2011-08-23 02:37:35 +04:00
f = fopen ( " /proc/cgroups " , " re " ) ;
if ( ! f ) {
2011-04-26 23:12:36 +04:00
log_error ( " Failed to enumerate cgroup controllers: %m " ) ;
return 0 ;
}
2010-04-17 01:22:32 +04:00
2011-08-23 02:37:35 +04:00
controllers = set_new ( string_hash_func , string_compare_func ) ;
2013-04-23 16:28:10 +04:00
if ( ! controllers )
return log_oom ( ) ;
2011-08-23 02:37:35 +04:00
2010-04-17 01:22:32 +04:00
/* Ignore the header line */
2010-05-10 05:34:31 +04:00
( void ) fgets ( buf , sizeof ( buf ) , f ) ;
2010-04-17 01:22:32 +04:00
for ( ; ; ) {
2011-08-23 02:37:35 +04:00
char * controller ;
int enabled = 0 ;
2010-04-17 01:22:32 +04:00
2010-11-22 13:06:38 +03:00
if ( fscanf ( f , " %ms %*i %*i %i " , & controller , & enabled ) ! = 2 ) {
2010-04-17 01:22:32 +04:00
if ( feof ( f ) )
break ;
log_error ( " Failed to parse /proc/cgroups. " ) ;
2013-04-23 16:28:10 +04:00
return - EIO ;
2010-04-17 01:22:32 +04:00
}
2010-11-22 00:29:10 +03:00
if ( ! enabled ) {
free ( controller ) ;
continue ;
}
2013-04-23 07:12:15 +04:00
r = set_consume ( controllers , controller ) ;
2011-08-23 02:37:35 +04:00
if ( r < 0 ) {
log_error ( " Failed to add controller to set. " ) ;
2013-04-23 16:28:10 +04:00
return r ;
2011-08-23 02:37:35 +04:00
}
}
for ( ; ; ) {
2014-03-18 07:06:36 +04:00
_cleanup_free_ char * options = NULL , * controller = NULL , * where = NULL ;
2013-04-23 16:28:10 +04:00
MountPoint p = {
. what = " cgroup " ,
. type = " cgroup " ,
. flags = MS_NOSUID | MS_NOEXEC | MS_NODEV ,
. mode = MNT_IN_CONTAINER ,
} ;
2011-08-23 02:37:35 +04:00
char * * * k = NULL ;
controller = set_steal_first ( controllers ) ;
if ( ! controller )
break ;
if ( join_controllers )
for ( k = join_controllers ; * k ; k + + )
if ( strv_find ( * k , controller ) )
break ;
if ( k & & * k ) {
char * * i , * * j ;
for ( i = * k , j = * k ; * i ; i + + ) {
if ( ! streq ( * i , controller ) ) {
2014-03-18 07:06:36 +04:00
_cleanup_free_ char * t ;
2011-08-23 02:37:35 +04:00
t = set_remove ( controllers , * i ) ;
if ( ! t ) {
free ( * i ) ;
continue ;
}
}
* ( j + + ) = * i ;
}
* j = NULL ;
options = strv_join ( * k , " , " ) ;
2013-04-23 16:28:10 +04:00
if ( ! options )
return log_oom ( ) ;
2011-08-23 02:37:35 +04:00
} else {
options = controller ;
controller = NULL ;
}
2014-03-18 07:06:36 +04:00
where = strappend ( " /sys/fs/cgroup/ " , options ) ;
if ( ! where )
return log_oom ( ) ;
p . where = where ;
2011-08-23 02:37:35 +04:00
p . options = options ;
2010-04-17 01:22:32 +04:00
2011-07-29 03:48:18 +04:00
r = mount_one ( & p , true ) ;
2013-04-23 16:28:10 +04:00
if ( r < 0 )
return r ;
2011-08-23 02:37:35 +04:00
if ( r > 0 & & k & & * k ) {
char * * i ;
for ( i = * k ; * i ; i + + ) {
2014-03-18 07:06:36 +04:00
_cleanup_free_ char * t = NULL ;
t = strappend ( " /sys/fs/cgroup/ " , * i ) ;
if ( ! t )
return log_oom ( ) ;
2011-08-23 02:37:35 +04:00
r = symlink ( options , t ) ;
if ( r < 0 & & errno ! = EEXIST ) {
2013-04-23 04:51:29 +04:00
log_error ( " Failed to create symlink %s: %m " , t ) ;
2013-04-23 16:28:10 +04:00
return - errno ;
2011-08-23 02:37:35 +04:00
}
}
}
2010-04-17 01:22:32 +04:00
}
2014-03-18 07:06:54 +04:00
/* Now that we mounted everything, let's make the tmpfs the
* cgroup file systems are mounted into read - only . */
mount ( " tmpfs " , " /sys/fs/cgroup " , " tmpfs " , MS_REMOUNT | MS_NOSUID | MS_NOEXEC | MS_NODEV | MS_STRICTATIME | MS_RDONLY , " mode=755 " ) ;
2013-04-23 16:28:10 +04:00
return 0 ;
2010-04-17 01:22:32 +04:00
}
2010-11-08 06:59:39 +03:00
static int nftw_cb (
const char * fpath ,
const struct stat * sb ,
int tflag ,
struct FTW * ftwbuf ) {
2010-11-11 03:22:42 +03:00
/* No need to label /dev twice in a row... */
2011-08-30 02:16:00 +04:00
if ( _unlikely_ ( ftwbuf - > level = = 0 ) )
return FTW_CONTINUE ;
2012-07-03 18:25:50 +04:00
label_fix ( fpath , false , false ) ;
2011-08-30 20:49:17 +04:00
2011-08-30 02:16:00 +04:00
/* /run/initramfs is static data and big, no need to
2011-08-30 20:49:17 +04:00
* dynamically relabel its contents at boot . . . */
2011-08-30 02:16:00 +04:00
if ( _unlikely_ ( ftwbuf - > level = = 1 & &
tflag = = FTW_D & &
streq ( fpath , " /run/initramfs " ) ) )
return FTW_SKIP_SUBTREE ;
2010-11-11 03:22:42 +03:00
2011-08-30 02:16:00 +04:00
return FTW_CONTINUE ;
2010-11-08 06:59:39 +03:00
} ;
2011-07-29 01:52:23 +04:00
int mount_setup ( bool loaded_policy ) {
2010-03-31 18:29:55 +04:00
int r ;
2010-04-10 19:42:00 +04:00
unsigned i ;
2010-03-31 18:29:55 +04:00
2011-07-29 03:48:18 +04:00
for ( i = 0 ; i < ELEMENTSOF ( mount_table ) ; i + + ) {
r = mount_one ( mount_table + i , true ) ;
if ( r < 0 )
2010-03-31 18:29:55 +04:00
return r ;
2011-07-29 03:48:18 +04:00
}
2010-03-31 18:29:55 +04:00
2011-04-07 23:22:41 +04:00
/* Nodes in devtmpfs and /run need to be manually updated for
* the appropriate labels , after mounting . The other virtual
* API file systems like / sys and / proc do not need that , they
* use the same label for all their files . */
2011-07-29 01:52:23 +04:00
if ( loaded_policy ) {
usec_t before_relabel , after_relabel ;
char timespan [ FORMAT_TIMESPAN_MAX ] ;
before_relabel = now ( CLOCK_MONOTONIC ) ;
2011-08-30 02:16:00 +04:00
nftw ( " /dev " , nftw_cb , 64 , FTW_MOUNT | FTW_PHYS | FTW_ACTIONRETVAL ) ;
nftw ( " /run " , nftw_cb , 64 , FTW_MOUNT | FTW_PHYS | FTW_ACTIONRETVAL ) ;
2011-07-29 01:52:23 +04:00
after_relabel = now ( CLOCK_MONOTONIC ) ;
log_info ( " Relabelled /dev and /run in %s. " ,
2013-04-04 04:56:56 +04:00
format_timespan ( timespan , sizeof ( timespan ) , after_relabel - before_relabel , 0 ) ) ;
2011-04-04 18:56:51 +04:00
}
2010-11-08 06:59:39 +03:00
2010-10-27 07:47:48 +04:00
/* Create a few default symlinks, which are normally created
2011-04-07 23:22:41 +04:00
* by udevd , but some scripts might need them before we start
2010-10-27 07:47:48 +04:00
* udevd . */
2012-08-21 19:23:03 +04:00
dev_setup ( NULL ) ;
2010-10-27 07:47:48 +04:00
2012-08-06 20:28:42 +04:00
/* Mark the root directory as shared in regards to mount
* propagation . The kernel defaults to " private " , but we think
* it makes more sense to have a default of " shared " so that
* nspawn and the container tools work out of the box . If
* specific setups need other settings they can reset the
* propagation mode to private if needed . */
2012-09-18 14:05:47 +04:00
if ( detect_container ( NULL ) < = 0 )
if ( mount ( NULL , " / " , NULL , MS_REC | MS_SHARED , NULL ) < 0 )
log_warning ( " Failed to set up the root directory for shared mount propagation: %m " ) ;
2012-08-06 20:28:42 +04:00
sd-booted: update sd_booted() check a bit
Previously we were testing whether /sys/fs/cgroup/systemd/ was a mount
point. This might be problematic however, when the cgroup trees are bind
mounted into a container from the host (which should be absolutely
valid), which might create the impression that the container was running
systemd, but only the host actually is.
Replace this by a check for the existance of the directory
/run/systemd/system/, which should work unconditionally, since /run can
never be a bind mount but *must* be a tmpfs on systemd systems, which is
flushed at boots. This means that data in /run always reflects
information about the current boot, and only of the local container,
which makes it the perfect choice for a check like this.
(As side effect this is nice to Ubuntu people who now use logind with
the systemd cgroup hierarchy, where the old sd_booted() check misdetects
systemd, even though they still run legacy Upstart.)
2013-03-15 19:41:40 +04:00
/* Create a few directories we always want around, Note that
* sd_booted ( ) checks for / run / systemd / system , so this mkdir
* really needs to stay for good , otherwise software that
* copied sd - daemon . c into their sources will misdetect
* systemd . */
2012-05-31 14:40:20 +04:00
mkdir_label ( " /run/systemd " , 0755 ) ;
mkdir_label ( " /run/systemd/system " , 0755 ) ;
2013-03-14 21:12:27 +04:00
mkdir_label ( " /run/systemd/inaccessible " , 0000 ) ;
2011-03-10 00:45:47 +03:00
2011-08-23 02:37:35 +04:00
return 0 ;
2010-03-31 18:29:55 +04:00
}