switch to vectored I/O

Signed-off-by: Wolfgang Bumiller <w.bumiller@errno.eu>
This commit is contained in:
Wolfgang Bumiller
2019-07-07 13:19:16 +02:00
parent 52f50bd443
commit 571dbe0396
5 changed files with 194 additions and 128 deletions

View File

@ -7,6 +7,7 @@ use failure::{bail, Error};
use libc::pid_t;
use super::seccomp::{SeccompNotif, SeccompNotifResp, SeccompNotifSizes};
use super::tools::{IoVec, IoVecMut};
/// Seccomp notification proxy message sent by the lxc monitor.
///
@ -46,53 +47,82 @@ pub struct SeccompNotifyProxyMsg {
/// Helper to receive and verify proxy notification messages.
#[repr(C)]
pub struct ProxyMessageBuffer {
buffer: Vec<u8>,
proxy_msg: SeccompNotifyProxyMsg,
seccomp_notif: SeccompNotif,
seccomp_resp: SeccompNotifResp,
cookie_buf: Vec<u8>,
sizes: SeccompNotifSizes,
seccomp_packet_size: usize,
}
unsafe fn io_vec_mut<T>(value: &mut T) -> IoVecMut {
IoVecMut::new(std::slice::from_raw_parts_mut(
value as *mut T as *mut u8,
mem::size_of::<T>(),
))
}
unsafe fn io_vec<T>(value: &T) -> IoVec {
IoVec::new(std::slice::from_raw_parts(
value as *const T as *const u8,
mem::size_of::<T>(),
))
}
impl ProxyMessageBuffer {
/// Allocate a new proxy message buffer with a specific maximum cookie size.
pub fn new(max_cookie: usize) -> io::Result<Self> {
let sizes = SeccompNotifSizes::get()?;
let max_size = sizes.notif as usize + sizes.notif_resp as usize + max_cookie;
let sizes = SeccompNotifSizes::get_checked()?;
let seccomp_packet_size = mem::size_of::<SeccompNotifyProxyMsg>()
+ sizes.notif as usize
+ sizes.notif_resp as usize;
Ok(Self {
buffer: unsafe { super::tools::vec::uninitialized(max_size) },
proxy_msg: unsafe { mem::zeroed() },
seccomp_notif: unsafe { mem::zeroed() },
seccomp_resp: unsafe { mem::zeroed() },
cookie_buf: unsafe { super::tools::vec::uninitialized(max_cookie) },
sizes,
seccomp_packet_size,
})
}
/// Allow this buffer to be filled with new data.
/// Resets the buffer capacity and returns an IoVecMut used to fill the buffer.
///
/// This resets the buffer's length to its full capacity and returns a mutable slice.
///
/// After this you must call `set_len()` with the number of bytes written to the buffer to
/// verify the new contents.
pub unsafe fn new_mut(&mut self) -> &mut [u8] {
self.buffer.set_len(self.buffer.capacity());
&mut self.buffer[..]
}
/// This vector covers the cookie buffer, but unless `set_len` is used afterwards with the real
/// size read into the slice, the cookie will appear empty.
pub fn io_vec_mut(&mut self) -> [IoVecMut; 4] {
self.proxy_msg.cookie_len = 0;
fn drop_cookie(&mut self) {
self.msg_mut().cookie_len = 0;
unsafe {
self.buffer.set_len(self.seccomp_packet_size);
self.cookie_buf.set_len(self.cookie_buf.capacity());
}
let out = [
unsafe { io_vec_mut(&mut self.proxy_msg) },
unsafe { io_vec_mut(&mut self.seccomp_notif) },
unsafe { io_vec_mut(&mut self.seccomp_resp) },
IoVecMut::new(self.cookie_buf.as_mut_slice()),
];
unsafe {
self.cookie_buf.set_len(0);
}
out
}
/// Prepare to send a reply.
///
/// This drops the cookie and returns a byte slice of the proxy message struct suitable to be
/// sent as a response to the lxc monitor.
///
/// The cookie will be inaccessible after this.
pub fn as_buf_no_cookie(&mut self) -> &[u8] {
self.drop_cookie();
&self.buffer[..]
/// Returns an io slice covering only the data expected by liblxc. The cookie will be excluded.
pub fn io_vec_no_cookie(&mut self) -> [IoVec; 3] {
[
unsafe { io_vec(&self.proxy_msg) },
unsafe { io_vec(&self.seccomp_notif) },
unsafe { io_vec(&self.seccomp_resp) },
]
}
#[inline]
@ -105,30 +135,44 @@ impl ProxyMessageBuffer {
resp.flags = 0;
}
/// You must call this after writing a new packet to via `new_mut()`. This verifies that there's
/// enough data available.
///
/// If this returns false, you must not attempt to access the data!
/// Called by with_io_slice after the callback returned the new size. This verifies that
/// there's enough data available.
pub fn set_len(&mut self, len: usize) -> Result<(), Error> {
if len > self.buffer.capacity() {
bail!("seccomp proxy message longer than buffer capacity");
if len < self.seccomp_packet_size {
bail!("seccomp proxy message too short");
}
if !self.validate() {
if self.proxy_msg.reserved0 != 0 {
bail!("reserved data wasn't 0, liblxc secocmp notify protocol mismatch");
}
if !self.check_sizes() {
bail!("seccomp proxy message content size validation failed");
}
if len != self.seccomp_packet_size + self.cookie_len() {
if len - self.seccomp_packet_size > self.cookie_buf.capacity() {
bail!("seccomp proxy message too long");
}
let cookie_len = match usize::try_from(self.proxy_msg.cookie_len) {
Ok(cl) => cl,
Err(_) => {
self.proxy_msg.cookie_len = 0;
bail!("cookie length exceeds our size type!");
}
};
if len != self.seccomp_packet_size + cookie_len {
bail!(
"seccomp proxy packet contains unexpected cookie length {} + {} != {}",
self.seccomp_packet_size,
self.cookie_len(),
cookie_len,
len
);
}
unsafe {
self.buffer.set_len(len);
self.cookie_buf.set_len(cookie_len);
}
self.prepare_response();
@ -136,93 +180,44 @@ impl ProxyMessageBuffer {
Ok(())
}
fn validate(&self) -> bool {
if self.reserved0() != 0 {
return false;
}
let got = self.msg().sizes.clone();
fn check_sizes(&self) -> bool {
let got = self.proxy_msg.sizes.clone();
got.notif == self.sizes.notif
&& got.notif_resp == self.sizes.notif_resp
&& got.data == self.sizes.data
}
#[inline]
fn msg_ptr(&self) -> *const SeccompNotifyProxyMsg {
self.buffer.as_ptr() as *const SeccompNotifyProxyMsg
}
#[inline]
fn msg(&self) -> &SeccompNotifyProxyMsg {
unsafe { &*self.msg_ptr() }
}
#[inline]
fn msg_mut_ptr(&mut self) -> *mut SeccompNotifyProxyMsg {
self.buffer.as_mut_ptr() as *mut SeccompNotifyProxyMsg
}
#[inline]
fn msg_mut(&mut self) -> &mut SeccompNotifyProxyMsg {
unsafe { &mut *self.msg_mut_ptr() }
}
fn reserved0(&self) -> u64 {
self.msg().reserved0
}
/// Get the monitor pid from the current message.
///
/// There's no guarantee that the pid is valid.
pub fn monitor_pid(&self) -> pid_t {
self.msg().monitor_pid
self.proxy_msg.monitor_pid
}
/// Get the container's init pid from the current message.
///
/// There's no guarantee that the pid is valid.
pub fn init_pid(&self) -> pid_t {
self.msg().init_pid
self.proxy_msg.init_pid
}
/// Get the syscall request structure of this message.
pub fn request(&self) -> &SeccompNotif {
unsafe {
&*(self
.buffer
.as_ptr()
.add(mem::size_of::<SeccompNotifyProxyMsg>()) as *const SeccompNotif)
}
&self.seccomp_notif
}
/// Access the response buffer of this message.
pub fn response_mut(&mut self) -> &mut SeccompNotifResp {
unsafe {
&mut *(self
.buffer
.as_mut_ptr()
.add(mem::size_of::<SeccompNotifyProxyMsg>())
.add(usize::from(self.sizes.notif)) as *mut SeccompNotifResp)
}
&mut self.seccomp_resp
}
/// Get the cookie's length.
pub fn cookie_len(&self) -> usize {
usize::try_from(self.msg().cookie_len).expect("cookie size should fit in an usize")
usize::try_from(self.proxy_msg.cookie_len).expect("cookie size should fit in an usize")
}
/// Get the cookie sent along with this message.
pub fn cookie(&self) -> &[u8] {
let len = self.cookie_len();
unsafe {
let start = self
.buffer
.as_ptr()
.add(mem::size_of::<SeccompNotifyProxyMsg>())
.add(usize::from(self.sizes.notif))
.add(usize::from(self.sizes.notif_resp));
std::slice::from_raw_parts(start, len)
}
&self.cookie_buf
}
}

View File

@ -1,5 +1,6 @@
#![feature(async_await)]
use std::ffi::OsString;
use std::io;
use failure::{bail, format_err, Error};
@ -12,9 +13,6 @@ pub mod tools;
use socket::{AsyncSeqPacketSocket, SeqPacketListener};
const SOCKET_DIR: &'static str = "/run/pve";
const SOCKET_PATH: &'static str = "/run/pve/lxc-syscalld.sock";
fn main() {
if let Err(err) = run() {
eprintln!("error: {}", err);
@ -23,27 +21,31 @@ fn main() {
}
fn run() -> Result<(), Error> {
let _ = std::fs::create_dir(SOCKET_DIR);
let socket_path = std::env::args_os()
.skip(1)
.next()
.ok_or_else(|| format_err!("missing parameter: socket path to listen on"))?;
match std::fs::remove_file(SOCKET_PATH) {
match std::fs::remove_file(&socket_path) {
Ok(_) => (),
Err(ref e) if e.kind() == io::ErrorKind::NotFound => (), // Ok
Err(e) => bail!("failed to remove previous socket: {}", e),
}
tokio::run(async_run());
tokio::run(async_run(socket_path));
Ok(())
}
async fn async_run() {
if let Err(err) = async_run_do().await {
async fn async_run(socket_path: OsString) {
if let Err(err) = async_run_do(socket_path).await {
eprintln!("error accepting clients, bailing out: {}", err);
}
}
async fn async_run_do() -> Result<(), Error> {
let address = SockAddr::new_unix(SOCKET_PATH).expect("cannot create struct sockaddr_un?");
async fn async_run_do(socket_path: OsString) -> Result<(), Error> {
let address =
SockAddr::new_unix(socket_path.as_os_str()).expect("cannot create struct sockaddr_un?");
let mut listener = SeqPacketListener::bind(&address)
.map_err(|e| format_err!("failed to create listening socket: {}", e))?;
@ -67,7 +69,11 @@ async fn handle_client_do(mut client: AsyncSeqPacketSocket) -> Result<(), Error>
.map_err(|e| format_err!("failed to allocate proxy message buffer: {}", e))?;
loop {
let (size, _fds) = client.recv_fds(unsafe { msgbuf.new_mut() }, 1).await?;
let (size, _fds) = {
let mut iovec = msgbuf.io_vec_mut();
client.recv_fds_vectored(&mut iovec, 1).await?
};
if size == 0 {
println!("client disconnected");
break;
@ -82,7 +88,8 @@ async fn handle_client_do(mut client: AsyncSeqPacketSocket) -> Result<(), Error>
resp.val = 0;
resp.error = -libc::ENOENT;
client.sendmsg(msgbuf.as_buf_no_cookie()).await?;
let iovec = msgbuf.io_vec_no_cookie();
client.sendmsg_vectored(&iovec).await?;
}
Ok(())

View File

@ -2,8 +2,8 @@
//!
//! Mostly provides data structures.
use std::io;
use std::os::raw::c_int;
use std::{io, mem};
/// Contains syscall data.
#[repr(C)]
@ -73,4 +73,28 @@ impl SeccompNotifSizes {
Err(io::Error::last_os_error())
}
}
/// Check whether the kernel's data structure sizes match the one this
/// crate was compiled with.
pub fn check(&self) -> io::Result<()> {
if usize::from(self.notif) != mem::size_of::<SeccompNotif>()
|| usize::from(self.notif_resp) != mem::size_of::<SeccompNotifResp>()
|| usize::from(self.data) != mem::size_of::<SeccompData>()
{
Err(io::Error::new(
io::ErrorKind::Other,
"seccomp data structure size mismatch",
))
} else {
Ok(())
}
}
/// Query the kernel for its data structure sizes and check whether they
/// match this ones this crate was compiled with.
pub fn get_checked() -> io::Result<Self> {
let this = Self::get()?;
this.check()?;
Ok(this)
}
}

View File

@ -10,7 +10,7 @@ use futures::future::poll_fn;
use futures::ready;
use nix::sys::socket::{AddressFamily, SockAddr, SockFlag, SockType};
use super::tools::{vec, Fd};
use super::tools::{vec, Fd, IoVec, IoVecMut};
pub struct SeqPacketSocket(Fd);
@ -25,7 +25,11 @@ impl SeqPacketSocket {
(self.0).0
}
pub fn recv_fds(&mut self, data: &mut [u8], num_fds: usize) -> io::Result<(usize, Vec<Fd>)> {
pub fn recv_fds_vectored(
&mut self,
iov: &mut [IoVecMut],
num_fds: usize,
) -> io::Result<(usize, Vec<Fd>)> {
let fdlist_size = u32::try_from(mem::size_of::<RawFd>() * num_fds)
.map_err(|e| io::Error::new(io::ErrorKind::Other, format!("size error: {}", e)))?;
@ -34,13 +38,8 @@ impl SeqPacketSocket {
ptr::write_bytes(cmsgbuf.as_mut_ptr(), 0xff, cmsgbuf.len());
}
let mut iov = [libc::iovec {
iov_base: data.as_mut_ptr() as *mut c_void,
iov_len: data.len(),
}];
let mut msg: libc::msghdr = unsafe { mem::zeroed() };
msg.msg_iov = iov.as_mut_ptr() as *mut libc::iovec;
msg.msg_iov = iov.as_mut_ptr() as *mut _ as *mut libc::iovec;
msg.msg_iovlen = iov.len();
msg.msg_controllen = cmsgbuf.len();
msg.msg_control = cmsgbuf.as_mut_ptr() as *mut c_void;
@ -78,14 +77,9 @@ impl SeqPacketSocket {
///
/// Note that short writes are silently treated as success, since this is a `SOCK_SEQPACKET`,
/// so neither continuing nor repeating a partial messages makes all that much sense.
pub fn sendmsg(&mut self, data: &[u8]) -> io::Result<()> {
let mut iov = [libc::iovec {
iov_base: data.as_ptr() as *const c_void as *mut c_void,
iov_len: data.len(),
}];
pub fn sendmsg_vectored(&mut self, iov: &[IoVec]) -> io::Result<()> {
let mut msg: libc::msghdr = unsafe { mem::zeroed() };
msg.msg_iov = iov.as_mut_ptr() as *mut libc::iovec;
msg.msg_iov = iov.as_ptr() as *const libc::iovec as *mut libc::iovec;
msg.msg_iovlen = iov.len();
let sent = unsafe { libc::sendmsg(self.fd(), &mut msg, libc::MSG_NOSIGNAL) };
@ -211,14 +205,14 @@ impl AsyncSeqPacketSocket {
})
}
pub fn poll_recv_fds(
pub fn poll_recv_fds_vectored(
&mut self,
data: &mut [u8],
iov: &mut [IoVecMut],
num_fds: usize,
cx: &mut Context,
) -> Poll<io::Result<(usize, Vec<Fd>)>> {
loop {
match self.socket.recv_fds(data, num_fds) {
match self.socket.recv_fds_vectored(iov, num_fds) {
Ok(res) => break Poll::Ready(Ok(res)),
Err(ref err) if err.kind() == io::ErrorKind::WouldBlock => {
match ready!(self.registration.poll_read_ready(cx)) {
@ -231,17 +225,21 @@ impl AsyncSeqPacketSocket {
}
}
pub async fn recv_fds(
pub async fn recv_fds_vectored(
&mut self,
data: &mut [u8],
iov: &mut [IoVecMut<'_>],
num_fds: usize,
) -> io::Result<(usize, Vec<Fd>)> {
poll_fn(move |cx| self.poll_recv_fds(data, num_fds, cx)).await
poll_fn(move |cx| self.poll_recv_fds_vectored(iov, num_fds, cx)).await
}
pub fn poll_sendmsg(&mut self, data: &[u8], cx: &mut Context) -> Poll<io::Result<()>> {
pub fn poll_sendmsg_vectored(
&mut self,
data: &[IoVec],
cx: &mut Context,
) -> Poll<io::Result<()>> {
loop {
match self.socket.sendmsg(data) {
match self.socket.sendmsg_vectored(data) {
Ok(res) => break Poll::Ready(Ok(res)),
Err(ref err) if err.kind() == io::ErrorKind::WouldBlock => {
match ready!(self.registration.poll_write_ready(cx)) {
@ -254,7 +252,7 @@ impl AsyncSeqPacketSocket {
}
}
pub async fn sendmsg(&mut self, data: &[u8]) -> io::Result<()> {
poll_fn(move |cx| self.poll_sendmsg(data, cx)).await
pub async fn sendmsg_vectored(&mut self, data: &[IoVec<'_>]) -> io::Result<()> {
poll_fn(move |cx| self.poll_sendmsg_vectored(data, cx)).await
}
}

View File

@ -4,6 +4,7 @@
//! crate as that's where we have all this stuff usually...
use std::io;
use std::marker::PhantomData;
use std::os::unix::io::{AsRawFd, FromRawFd, IntoRawFd, RawFd};
use mio::unix::EventedFd;
@ -90,3 +91,44 @@ pub mod vec {
out
}
}
/// The standard IoSlice does not implement Send and Sync. These types do.
pub struct IoVec<'a> {
_iov: libc::iovec,
_phantom: PhantomData<&'a [u8]>,
}
unsafe impl Send for IoVec<'_> {}
unsafe impl Sync for IoVec<'_> {}
impl IoVec<'_> {
pub fn new(slice: &[u8]) -> Self {
Self {
_iov: libc::iovec {
iov_base: slice.as_ptr() as *mut libc::c_void,
iov_len: slice.len(),
},
_phantom: PhantomData,
}
}
}
pub struct IoVecMut<'a> {
_iov: libc::iovec,
_phantom: PhantomData<&'a [u8]>,
}
unsafe impl Send for IoVecMut<'_> {}
unsafe impl Sync for IoVecMut<'_> {}
impl IoVecMut<'_> {
pub fn new(slice: &mut [u8]) -> Self {
Self {
_iov: libc::iovec {
iov_base: slice.as_mut_ptr() as *mut libc::c_void,
iov_len: slice.len(),
},
_phantom: PhantomData,
}
}
}