handle capabilities and permission checks for mknod

Signed-off-by: Wolfgang Bumiller <w.bumiller@proxmox.com>
This commit is contained in:
Wolfgang Bumiller 2019-07-10 11:35:58 +02:00
parent 3bb4df0ba5
commit 512f780a8c
5 changed files with 399 additions and 86 deletions

View File

@ -4,10 +4,10 @@ use std::convert::TryFrom;
use std::ffi::CString;
use std::os::raw::c_int;
use std::os::unix::fs::FileExt;
use std::os::unix::io::{FromRawFd, IntoRawFd, RawFd};
use std::os::unix::io::RawFd;
use std::{io, mem};
use failure::{bail, Error};
use failure::{bail, format_err, Error};
use lazy_static::lazy_static;
use libc::pid_t;
use nix::errno::Errno;
@ -15,7 +15,7 @@ use nix::errno::Errno;
use crate::pidfd::PidFd;
use crate::seccomp::{SeccompNotif, SeccompNotifResp, SeccompNotifSizes};
use crate::socket::AsyncSeqPacketSocket;
use crate::tools::{Fd, IoVec, IoVecMut};
use crate::tools::{Fd, FromFd, IoVec, IoVecMut};
/// Seccomp notification proxy message sent by the lxc monitor.
///
@ -134,16 +134,18 @@ impl ProxyMessageBuffer {
self.set_len(size)?;
let mut fds = fds.into_iter();
self.pid_fd = fds
let pid_fd = unsafe {
PidFd::try_from_fd(
fds.next()
.ok_or_else(|| format_err!("lxc seccomp message without pidfd"))?,
)?
};
let mem_fd = fds
.next()
.map(|fd| unsafe { PidFd::from_raw_fd(fd.into_raw_fd()) });
self.mem_fd = fds
.next()
.map(|fd| unsafe { std::fs::File::from_raw_fd(fd.into_raw_fd()) });
if self.mem_fd.is_none() {
self.drop_fds();
bail!("missing file descriptors with proxied seccomp message");
}
.ok_or_else(|| format_err!("lxc seccomp message without memfd"))?;
self.pid_fd = Some(pid_fd);
self.mem_fd = Some(std::fs::File::from_fd(mem_fd));
Ok(true)
}

View File

@ -1,24 +1,64 @@
//! pidfd helper functionality
use std::ffi::{CStr, CString};
use std::io;
use std::collections::HashMap;
use std::ffi::{CStr, CString, OsStr, OsString};
use std::io::{self, BufRead, BufReader};
use std::os::raw::c_int;
use std::os::unix::ffi::{OsStrExt, OsStringExt};
use std::os::unix::io::{AsRawFd, FromRawFd, IntoRawFd, RawFd};
use failure::{bail, Error};
use libc::pid_t;
use crate::libc_try;
use crate::nsfd::{ns_type, NsFd};
use crate::tools::Fd;
use crate::{file_descriptor_type, libc_try};
file_descriptor_type!(PidFd);
pub struct PidFd(RawFd, pid_t);
crate::file_descriptor_impl!(PidFd);
#[derive(Default)]
pub struct Uids {
pub ruid: libc::uid_t,
pub euid: libc::uid_t,
pub suid: libc::uid_t,
pub fsuid: libc::uid_t,
pub rgid: libc::gid_t,
pub egid: libc::gid_t,
pub sgid: libc::gid_t,
pub fsgid: libc::gid_t,
}
#[derive(Clone, Default)]
pub struct Capabilities {
inheritable: u64,
permitted: u64,
effective: u64,
//bounding: u64, // we don't care currently
}
#[derive(Default)]
pub struct ProcStatus {
uids: Uids,
capabilities: Capabilities,
umask: libc::mode_t,
}
impl PidFd {
pub fn open(pid: libc::pid_t) -> io::Result<Self> {
pub fn open(pid: pid_t) -> io::Result<Self> {
let path = CString::new(format!("/proc/{}", pid)).unwrap();
let fd =
libc_try!(unsafe { libc::open(path.as_ptr(), libc::O_DIRECTORY | libc::O_CLOEXEC) });
Ok(Self(fd))
Ok(Self(fd, pid))
}
pub unsafe fn try_from_fd(fd: Fd) -> io::Result<Self> {
let mut this = Self(fd.into_raw_fd(), -1 as pid_t);
let pid = this.read_pid()?;
this.1 = pid;
Ok(this)
}
pub fn mount_namespace(&self) -> io::Result<NsFd<ns_type::Mount>> {
@ -51,74 +91,334 @@ impl PidFd {
}
pub fn fd_cwd(&self) -> io::Result<Fd> {
self.fd(unsafe { CStr::from_bytes_with_nul_unchecked(b"cwd\0") }, libc::O_DIRECTORY, 0)
self.fd(
unsafe { CStr::from_bytes_with_nul_unchecked(b"cwd\0") },
libc::O_DIRECTORY,
0,
)
}
pub fn fd_num(&self, num: RawFd, flags: c_int) -> io::Result<Fd> {
let path = format!("fd/{}\0", num);
self.fd(unsafe { CStr::from_bytes_with_nul_unchecked(path.as_bytes()) }, flags, 0)
self.fd(
unsafe { CStr::from_bytes_with_nul_unchecked(path.as_bytes()) },
flags,
0,
)
}
//pub fn dup(&self) -> io::Result<Self> {
// Ok(Self(libc_try!(unsafe {
// libc::fcntl(self.as_raw_fd(), libc::F_DUPFD_CLOEXEC, 0)
// })))
//}
pub fn chroot(&self) -> io::Result<()> {
libc_try!(unsafe { libc::fchdir(self.as_raw_fd()) });
libc_try!(unsafe { libc::chroot(b"root\0".as_ptr() as *const _) });
libc_try!(unsafe { libc::chdir(b"/\0".as_ptr() as *const _) });
Ok(())
}
// procfs files cannot be async, we cannot add them to epoll...
pub fn open_file(&self, path: &CStr, flags: c_int, mode: c_int) -> io::Result<std::fs::File> {
Ok(unsafe { std::fs::File::from_raw_fd(self.fd(path, flags, mode)?.into_raw_fd()) })
}
pub fn get_euid_egid(&self) -> io::Result<(libc::uid_t, libc::gid_t)> {
use io::BufRead;
let reader = io::BufReader::new(self.open_file(
unsafe { CStr::from_bytes_with_nul_unchecked(b"status\0") },
#[inline]
fn open_buffered(&self, path: &CStr) -> io::Result<impl BufRead> {
Ok(BufReader::new(self.open_file(
path,
libc::O_RDONLY | libc::O_CLOEXEC,
0,
)?);
)?))
}
let mut uid = None;
let mut gid = None;
#[inline]
pub fn get_pid(&self) -> pid_t {
self.1
}
fn read_pid(&self) -> io::Result<pid_t> {
let reader =
self.open_buffered(unsafe { CStr::from_bytes_with_nul_unchecked(b"status\0") })?;
for line in reader.lines() {
let line = line?;
let mut parts = line.split_ascii_whitespace();
if parts.next() == Some("Pid:") {
let pid = parts
.next()
.ok_or_else(|| io::Error::new(io::ErrorKind::Other, "bad 'Pid:' line in proc"))?
.parse::<pid_t>()
.map_err(|_| {
io::Error::new(io::ErrorKind::Other, "failed to parse pid from proc")
})?;
return Ok(pid);
}
}
Err(io::ErrorKind::NotFound.into())
}
pub fn get_status(&self) -> io::Result<ProcStatus> {
let reader =
self.open_buffered(unsafe { CStr::from_bytes_with_nul_unchecked(b"status\0") })?;
#[inline]
fn check_uid_gid(value: Option<&str>) -> io::Result<libc::uid_t> {
value
.ok_or_else(|| io::Error::new(io::ErrorKind::Other, "bad 'Uid/Gid:' line in proc"))?
.parse::<libc::uid_t>()
.map_err(|_| io::Error::new(io::ErrorKind::Other, "failed to parse uid from proc"))
}
#[inline]
fn check_u64_hex(value: Option<&str>) -> io::Result<u64> {
Ok(u64::from_str_radix(
value.ok_or_else(|| {
io::Error::new(io::ErrorKind::Other, "bad numeric property line in proc")
})?,
16,
)
.map_err(|e| io::Error::new(io::ErrorKind::Other, e.to_string()))?)
}
#[inline]
fn check_u32_oct(value: Option<&str>) -> io::Result<u32> {
Ok(u32::from_str_radix(
value.ok_or_else(|| {
io::Error::new(io::ErrorKind::Other, "bad numeric property line in proc")
})?,
8,
)
.map_err(|e| io::Error::new(io::ErrorKind::Other, e.to_string()))?)
}
let mut ids = Uids::default();
let mut caps = Capabilities::default();
let mut umask = 0o022;
for line in reader.lines() {
let line = line?;
let mut parts = line.split_ascii_whitespace();
match parts.next() {
Some("Uid:") => {
uid = Some(parts
.skip(1)
.next()
.ok_or_else(|| {
io::Error::new(io::ErrorKind::Other, "bad 'Uid:' line in proc")
})?
.parse::<libc::uid_t>()
.map_err(|_| {
io::Error::new(io::ErrorKind::Other, "failed to parse uid from proc")
})?
);
ids.ruid = check_uid_gid(parts.next())?;
ids.euid = check_uid_gid(parts.next())?;
ids.suid = check_uid_gid(parts.next())?;
ids.fsuid = check_uid_gid(parts.next())?;
}
Some("Gid:") => {
gid = Some(parts
.skip(1)
.next()
.ok_or_else(|| {
io::Error::new(io::ErrorKind::Other, "bad 'Uid:' line in proc")
})?
.parse::<libc::gid_t>()
.map_err(|_| {
io::Error::new(io::ErrorKind::Other, "failed to parse gid from proc")
})?
);
ids.rgid = check_uid_gid(parts.next())?;
ids.egid = check_uid_gid(parts.next())?;
ids.sgid = check_uid_gid(parts.next())?;
ids.fsgid = check_uid_gid(parts.next())?;
}
Some("CapInh:") => caps.inheritable = check_u64_hex(parts.next())?,
Some("CapPrm:") => caps.permitted = check_u64_hex(parts.next())?,
Some("CapEff:") => caps.effective = check_u64_hex(parts.next())?,
//Some("CapBnd:") => caps.bounding = check_u64_hex(parts.next())?,
Some("Umask:") => umask = check_u32_oct(parts.next())?,
_ => continue,
}
if let (Some(u), Some(g)) = (uid, gid) {
return Ok((u, g));
}
}
Err(io::ErrorKind::InvalidData.into())
Ok(ProcStatus {
uids: ids,
capabilities: caps,
umask,
})
}
pub fn get_cgroups(&self) -> Result<CGroups, Error> {
let reader =
self.open_buffered(unsafe { CStr::from_bytes_with_nul_unchecked(b"cgroup\0") })?;
let mut cgroups = CGroups::new();
for line in reader.split(b'\n') {
let line = line?;
let mut parts = line.splitn(3, |b| *b == b':');
let num = parts.next();
let name = parts.next();
let path = parts.next();
if !num.is_some() || !name.is_some() || !path.is_some() || parts.next().is_some() {
bail!("failed to parse cgroup line: {:?}", line);
}
let name = String::from_utf8(name.unwrap().to_vec())?;
let path = OsString::from_vec(path.unwrap().to_vec());
if name.len() == 0 {
cgroups.v2 = Some(path);
} else {
for entry in name.split(',') {
cgroups.v1.insert(entry.to_string(), path.clone());
}
}
}
Ok(cgroups)
}
pub fn user_caps(&self) -> Result<UserCaps, Error> {
UserCaps::new(self)
}
}
pub struct CGroups {
v1: HashMap<String, OsString>,
v2: Option<OsString>,
}
impl CGroups {
fn new() -> Self {
Self {
v1: HashMap::new(),
v2: None,
}
}
pub fn get(&self, name: &str) -> Option<&OsStr> {
self.v1.get(name).map(|s| s.as_os_str())
}
pub fn v2(&self) -> Option<&OsStr> {
self.v2.as_ref().map(|s| s.as_os_str())
}
}
// Too lazy to bindgen libcap stuff...
const CAPABILITY_VERSION_3: u32 = 0x20080522;
/// Represents process capabilities.
///
/// This can be used to change the process' capability sets (if permitted by the kernel).
impl Capabilities {
// We currently don't implement capget as it takes a pid which is racy on kernels without pidfd
// support. Later on we might support a `capget(&PidFd)` method?
/// Change our process capabilities. This does not include the bounding set.
pub fn capset(&self) -> io::Result<()> {
#![allow(dead_code)]
// kernel abi:
struct Header {
version: u32,
pid: c_int,
}
struct Data {
effective: u32,
permitted: u32,
inheritable: u32,
}
let header = Header {
version: CAPABILITY_VERSION_3,
pid: 0, // equivalent to gettid(),
};
let data = [
Data {
effective: self.effective as u32,
permitted: self.permitted as u32,
inheritable: self.inheritable as u32,
},
Data {
effective: (self.effective >> 32) as u32,
permitted: (self.permitted >> 32) as u32,
inheritable: (self.inheritable >> 32) as u32,
},
];
libc_try!(unsafe { libc::syscall(libc::SYS_capset, &header, &data) });
Ok(())
}
/// Change the thread's keep-capabilities flag.
pub fn set_keep_caps(on: bool) -> io::Result<()> {
libc_try!(unsafe { libc::prctl(libc::PR_SET_KEEPCAPS, c_int::from(on)) });
Ok(())
}
}
/// Helper to enter a process' permission-check environment.
///
/// When we execute a syscall on behalf of another process, we should try to trigger as many
/// permission checks as we can. It is impractical to implement them all manually, so the best
/// thing to do is cause as many of them to happen on the kernel-side as we can.
///
/// We start by cloning the process' capability set. This is because the process may have dropped
/// capabilties which under normal conditions would prevent them from executing the syscall.
/// For example a process may be executing `mknod()` after having dropped `CAP_MKNOD`.
///
/// We then switch over our effective and file system uid and gid. This has 2 reasons: First, it
/// means we do not need to run `chown()` on files we create, secondly, the user may have dropped
/// `CAP_DAC_OVERRIDE` / `CAP_DAC_READ_SEARCH` which may have prevented the creation of the file in
/// the first place (for example, the container program may be a non-root executable with
/// `cap_mknod=ep` as file-capabilities, in which case we do not want a user to be allowed to run
/// `mknod()` on a path owned by different user. (And checking file system permissions would
/// require us to handle ACLs, quotas, which are all file system tyep dependent as well, so better
/// leave all that up to the kernel, too!)
///
/// Finally, we also need to make sure we are in the same `devices` cgroup, because it too should
/// affect calls such as `mknod()`.
#[derive(Clone)]
#[must_use = "not using UserCaps may be a security issue"]
pub struct UserCaps {
euid: libc::uid_t,
egid: libc::gid_t,
fsuid: libc::uid_t,
fsgid: libc::gid_t,
capabilities: Capabilities,
umask: libc::mode_t,
cgroup_v1_devices: Option<OsString>,
cgroup_v2: Option<OsString>,
}
impl UserCaps {
pub fn new(pidfd: &PidFd) -> Result<UserCaps, Error> {
let status = pidfd.get_status()?;
let cgroups = pidfd.get_cgroups()?;
Ok(UserCaps {
euid: status.uids.euid,
egid: status.uids.egid,
fsuid: status.uids.fsuid,
fsgid: status.uids.fsgid,
capabilities: status.capabilities,
umask: status.umask,
cgroup_v1_devices: cgroups.get("devices").map(|s| s.to_owned()),
cgroup_v2: cgroups.v2().map(|s| s.to_owned()),
})
}
pub fn apply_cgroups(&self) -> io::Result<()> {
fn enter_cgroup(kind: &str, name: &OsStr) -> io::Result<()> {
let mut path = OsString::with_capacity(15 + kind.len() + name.len() + 13 + 1);
path.push(OsStr::from_bytes(b"/sys/fs/cgroup/"));
path.push(kind);
path.push(name);
path.push(OsStr::from_bytes(b"/cgroup.procs"));
std::fs::write(path, b"0")
}
if let Some(ref cg) = self.cgroup_v1_devices {
enter_cgroup("devices/", cg)?;
}
if let Some(ref cg) = self.cgroup_v2 {
enter_cgroup("unified/", cg)?;
}
Ok(())
}
pub fn apply_user_caps(self) -> io::Result<()> {
unsafe {
libc::umask(self.umask);
}
Capabilities::set_keep_caps(true)?;
libc_try!(unsafe { libc::setegid(self.egid) });
libc_try!(unsafe { libc::setfsgid(self.fsgid) });
libc_try!(unsafe { libc::seteuid(self.euid) });
libc_try!(unsafe { libc::setfsuid(self.fsuid) });
self.capabilities.capset()?;
Ok(())
}
}

View File

@ -1,15 +1,15 @@
use std::ffi::CString;
use std::os::unix::io::{AsRawFd, FromRawFd};
use std::os::unix::io::AsRawFd;
use failure::Error;
use nix::sys::stat;
use crate::fork::forking_syscall;
use crate::{libc_try, sc_libc_try};
use crate::lxcseccomp::ProxyMessageBuffer;
use crate::pidfd::PidFd;
use crate::syscall::SyscallStatus;
use crate::tools::Fd;
use crate::{libc_try, sc_libc_try};
pub async fn mknod(msg: &ProxyMessageBuffer) -> Result<SyscallStatus, Error> {
let mode = msg.arg_mode_t(1)?;
@ -21,8 +21,7 @@ pub async fn mknod(msg: &ProxyMessageBuffer) -> Result<SyscallStatus, Error> {
let pathname = msg.arg_c_string(0)?;
let cwd = msg.pid_fd().fd_cwd()?;
let pidfd = unsafe { PidFd::from_raw_fd(msg.pid_fd().as_raw_fd()) };
do_mknodat(pidfd, cwd, pathname, mode, dev).await
do_mknodat(msg.pid_fd(), cwd, pathname, mode, dev).await
}
pub async fn mknodat(msg: &ProxyMessageBuffer) -> Result<SyscallStatus, Error> {
@ -35,8 +34,7 @@ pub async fn mknodat(msg: &ProxyMessageBuffer) -> Result<SyscallStatus, Error> {
let dirfd = msg.arg_fd(0, libc::O_DIRECTORY)?;
let pathname = msg.arg_c_string(1)?;
let pidfd = unsafe { PidFd::from_raw_fd(msg.pid_fd().as_raw_fd()) };
do_mknodat(pidfd, dirfd, pathname, mode, dev).await
do_mknodat(msg.pid_fd(), dirfd, pathname, mode, dev).await
}
fn check_mknod_dev(mode: stat::mode_t, dev: stat::dev_t) -> bool {
@ -51,29 +49,24 @@ fn check_mknod_dev(mode: stat::mode_t, dev: stat::dev_t) -> bool {
}
async fn do_mknodat(
pidfd: PidFd,
pidfd: &PidFd,
dirfd: Fd,
pathname: CString,
mode: stat::mode_t,
dev: stat::dev_t,
) -> Result<SyscallStatus, Error> {
let (uid, gid) = pidfd.get_euid_egid()?;
let caps = pidfd.user_caps()?;
// FIXME: !!! ALSO COPY THE PROCESS' CAPABILITY SET AND USE KEEP_CAPS!
Ok(forking_syscall(move || {
caps.apply_cgroups()?;
pidfd.mount_namespace()?.setns()?;
pidfd.chroot()?;
libc_try!(unsafe { libc::fchdir(dirfd.as_raw_fd()) });
libc_try!(unsafe { libc::setegid(gid) });
libc_try!(unsafe { libc::seteuid(uid) });
let out = sc_libc_try!(unsafe {
libc::mknodat(
dirfd.as_raw_fd(),
pathname.as_ptr(),
mode,
dev,
)
});
caps.apply_user_caps()?;
let out =
sc_libc_try!(unsafe { libc::mknodat(dirfd.as_raw_fd(), pathname.as_ptr(), mode, dev) });
Ok(SyscallStatus::Ok(out.into()))
})
.await?)

View File

@ -32,7 +32,9 @@ macro_rules! sc_libc_try {
($expr:expr) => {{
let res = $expr;
if res == -1 {
return Ok($crate::syscall::SyscallStatus::Err(::nix::errno::errno() as _))
return Ok($crate::syscall::SyscallStatus::Err(
::nix::errno::errno() as _
));
} else {
res
}

View File

@ -21,6 +21,12 @@ macro_rules! file_descriptor_type {
pub struct $type(RawFd);
crate::file_descriptor_impl!($type);
impl FromRawFd for $type {
unsafe fn from_raw_fd(fd: RawFd) -> Self {
Self(fd)
}
}
};
}
@ -50,12 +56,6 @@ macro_rules! file_descriptor_impl {
fd
}
}
impl FromRawFd for $type {
unsafe fn from_raw_fd(fd: RawFd) -> Self {
Self(fd)
}
}
};
}
@ -67,6 +67,12 @@ pub struct Fd(pub RawFd);
file_descriptor_impl!(Fd);
impl FromRawFd for Fd {
unsafe fn from_raw_fd(fd: RawFd) -> Self {
Self(fd)
}
}
impl mio::Evented for Fd {
fn register(
&self,
@ -111,7 +117,7 @@ impl AsyncFd {
let registration = tokio::reactor::Registration::new();
if !registration.register(&fd)? {
return Err(io::Error::new(
io::ErrorKind::Other,
io::ErrorKind::Other,
"duplicate file descriptor registration?",
));
}
@ -301,3 +307,13 @@ macro_rules! libc_try {
}
}};
}
pub trait FromFd {
fn from_fd(fd: Fd) -> Self;
}
impl<T: FromRawFd> FromFd for T {
fn from_fd(fd: Fd) -> Self {
unsafe { Self::from_raw_fd(fd.into_raw_fd()) }
}
}