mirror of
https://github.com/systemd/systemd.git
synced 2024-12-23 21:35:11 +03:00
journal: when sending huge log messages prefer memfds over temporary files in /dev/shm
Previously when a log message grew beyond the maximum AF_UNIX/SOCK_DGRAM datagram limit we'd send an fd to a deleted file in /dev/shm instead. Because the sender could still modify the file after delivery we had to immediately copy the data on the receiving side. With memfds we can optimize this logic, and also remove the dependency on /dev/shm: simply send a sealed memfd around, and if we detect the seal memory map the fd and use it directly.
This commit is contained in:
parent
dd4540da0e
commit
c79e98eadd
@ -198,7 +198,7 @@ finish:
|
||||
|
||||
_public_ int sd_journal_sendv(const struct iovec *iov, int n) {
|
||||
PROTECT_ERRNO;
|
||||
int fd;
|
||||
int fd, r;
|
||||
_cleanup_close_ int buffer_fd = -1;
|
||||
struct iovec *w;
|
||||
uint64_t *l;
|
||||
@ -218,6 +218,7 @@ _public_ int sd_journal_sendv(const struct iovec *iov, int n) {
|
||||
} control;
|
||||
struct cmsghdr *cmsg;
|
||||
bool have_syslog_identifier = false;
|
||||
bool seal = true;
|
||||
|
||||
assert_return(iov, -EINVAL);
|
||||
assert_return(n > 0, -EINVAL);
|
||||
@ -304,21 +305,36 @@ _public_ int sd_journal_sendv(const struct iovec *iov, int n) {
|
||||
if (errno != EMSGSIZE && errno != ENOBUFS)
|
||||
return -errno;
|
||||
|
||||
/* Message doesn't fit... Let's dump the data in a temporary
|
||||
* file and just pass a file descriptor of it to the other
|
||||
* side.
|
||||
/* Message doesn't fit... Let's dump the data in a memfd or
|
||||
* temporary file and just pass a file descriptor of it to the
|
||||
* other side.
|
||||
*
|
||||
* We use /dev/shm instead of /tmp here, since we want this to
|
||||
* be a tmpfs, and one that is available from early boot on
|
||||
* and where unprivileged users can create files. */
|
||||
buffer_fd = open_tmpfile("/dev/shm", O_RDWR | O_CLOEXEC);
|
||||
if (buffer_fd < 0)
|
||||
return buffer_fd;
|
||||
* For the temporary files we use /dev/shm instead of /tmp
|
||||
* here, since we want this to be a tmpfs, and one that is
|
||||
* available from early boot on and where unprivileged users
|
||||
* can create files. */
|
||||
buffer_fd = memfd_create("journal-message", MFD_ALLOW_SEALING | MFD_CLOEXEC);
|
||||
if (buffer_fd < 0) {
|
||||
if (errno == ENOSYS) {
|
||||
buffer_fd = open_tmpfile("/dev/shm", O_RDWR | O_CLOEXEC);
|
||||
if (buffer_fd < 0)
|
||||
return buffer_fd;
|
||||
|
||||
seal = false;
|
||||
} else
|
||||
return -errno;
|
||||
}
|
||||
|
||||
n = writev(buffer_fd, w, j);
|
||||
if (n < 0)
|
||||
return -errno;
|
||||
|
||||
if (seal) {
|
||||
r = fcntl(buffer_fd, F_ADD_SEALS, F_SEAL_SHRINK | F_SEAL_GROW | F_SEAL_WRITE | F_SEAL_SEAL);
|
||||
if (r < 0)
|
||||
return -errno;
|
||||
}
|
||||
|
||||
mh.msg_iov = NULL;
|
||||
mh.msg_iovlen = 0;
|
||||
|
||||
|
@ -22,6 +22,7 @@
|
||||
#include <unistd.h>
|
||||
#include <stddef.h>
|
||||
#include <sys/epoll.h>
|
||||
#include <sys/mman.h>
|
||||
|
||||
#include "socket-util.h"
|
||||
#include "path-util.h"
|
||||
@ -306,17 +307,29 @@ void server_process_native_file(
|
||||
const char *label, size_t label_len) {
|
||||
|
||||
struct stat st;
|
||||
_cleanup_free_ void *p = NULL;
|
||||
ssize_t n;
|
||||
bool sealed;
|
||||
int r;
|
||||
|
||||
/* Data is in the passed fd, since it didn't fit in a
|
||||
* datagram. */
|
||||
|
||||
assert(s);
|
||||
assert(fd >= 0);
|
||||
|
||||
if (!ucred || ucred->uid != 0) {
|
||||
/* If it's a memfd, check if it is sealed. If so, we can just
|
||||
* use map it and use it, and do not need to copy the data
|
||||
* out. */
|
||||
r = fcntl(fd, F_GET_SEALS);
|
||||
sealed = r >= 0 &&
|
||||
(r & (F_SEAL_SHRINK|F_SEAL_GROW|F_SEAL_WRITE|F_SEAL_SEAL)) == (F_SEAL_SHRINK|F_SEAL_GROW|F_SEAL_WRITE|F_SEAL_SEAL);
|
||||
|
||||
if (!sealed && (!ucred || ucred->uid != 0)) {
|
||||
_cleanup_free_ char *sl = NULL, *k = NULL;
|
||||
const char *e;
|
||||
|
||||
/* If this is not a sealed memfd, and the peer is unknown or
|
||||
* unprivileged, then verify the path. */
|
||||
|
||||
if (asprintf(&sl, "/proc/self/fd/%i", fd) < 0) {
|
||||
log_oom();
|
||||
return;
|
||||
@ -344,11 +357,6 @@ void server_process_native_file(
|
||||
}
|
||||
}
|
||||
|
||||
/* Data is in the passed file, since it didn't fit in a
|
||||
* datagram. We can't map the file here, since clients might
|
||||
* then truncate it and trigger a SIGBUS for us. So let's
|
||||
* stupidly read it */
|
||||
|
||||
if (fstat(fd, &st) < 0) {
|
||||
log_error("Failed to stat passed file, ignoring: %m");
|
||||
return;
|
||||
@ -367,17 +375,41 @@ void server_process_native_file(
|
||||
return;
|
||||
}
|
||||
|
||||
p = malloc(st.st_size);
|
||||
if (!p) {
|
||||
log_oom();
|
||||
return;
|
||||
}
|
||||
if (sealed) {
|
||||
void *p;
|
||||
size_t ps;
|
||||
|
||||
n = pread(fd, p, st.st_size, 0);
|
||||
if (n < 0)
|
||||
log_error("Failed to read file, ignoring: %s", strerror(-n));
|
||||
else if (n > 0)
|
||||
server_process_native_message(s, p, n, ucred, tv, label, label_len);
|
||||
/* The file is sealed, we can just map it and use it. */
|
||||
|
||||
ps = PAGE_ALIGN(st.st_size);
|
||||
p = mmap(NULL, ps, PROT_READ, MAP_PRIVATE, fd, 0);
|
||||
if (p == MAP_FAILED) {
|
||||
log_error("Failed to map memfd, ignoring: %m");
|
||||
return;
|
||||
}
|
||||
|
||||
server_process_native_message(s, p, st.st_size, ucred, tv, label, label_len);
|
||||
assert_se(munmap(p, ps) >= 0);
|
||||
} else {
|
||||
_cleanup_free_ void *p = NULL;
|
||||
ssize_t n;
|
||||
|
||||
/* The file is not sealed, we can't map the file here, since
|
||||
* clients might then truncate it and trigger a SIGBUS for
|
||||
* us. So let's stupidly read it */
|
||||
|
||||
p = malloc(st.st_size);
|
||||
if (!p) {
|
||||
log_oom();
|
||||
return;
|
||||
}
|
||||
|
||||
n = pread(fd, p, st.st_size, 0);
|
||||
if (n < 0)
|
||||
log_error("Failed to read file, ignoring: %s", strerror(-n));
|
||||
else if (n > 0)
|
||||
server_process_native_message(s, p, n, ucred, tv, label, label_len);
|
||||
}
|
||||
}
|
||||
|
||||
int server_open_native_socket(Server*s) {
|
||||
|
Loading…
Reference in New Issue
Block a user