mirror of
https://github.com/systemd/systemd.git
synced 2024-12-22 17:35:35 +03:00
vmspawn: added vsock functionality
also includes cosmetic fixups from: https://github.com/systemd/systemd/pull/29874
This commit is contained in:
parent
8930285234
commit
f72a085641
@ -1,9 +1,9 @@
|
||||
/* SPDX-License-Identifier: LGPL-2.1-or-later */
|
||||
|
||||
#include <stdio.h>
|
||||
#include <unistd.h>
|
||||
#include <linux/vhost.h>
|
||||
#include <sys/ioctl.h>
|
||||
|
||||
#include "alloc-util.h"
|
||||
#include "architecture.h"
|
||||
#include "conf-files.h"
|
||||
#include "errno-util.h"
|
||||
@ -15,7 +15,10 @@
|
||||
#include "memory-util.h"
|
||||
#include "path-lookup.h"
|
||||
#include "path-util.h"
|
||||
#include "random-util.h"
|
||||
#include "recurse-dir.h"
|
||||
#include "siphash24.h"
|
||||
#include "socket-util.h"
|
||||
#include "sort-util.h"
|
||||
#include "string-util.h"
|
||||
#include "strv.h"
|
||||
@ -45,6 +48,32 @@ int qemu_check_kvm_support(void) {
|
||||
return -errno;
|
||||
}
|
||||
|
||||
int qemu_check_vsock_support(void) {
|
||||
_cleanup_close_ int fd = -EBADF;
|
||||
/* Just using access() will just check if the device node exists, but not whether a
|
||||
* device driver is behind it (this is a common case since systemd-tmpfiles creates
|
||||
* the device node on boot, typically).
|
||||
*
|
||||
* Hence we open() the path to see if there's actually something behind.
|
||||
*
|
||||
* If not this should return ENODEV.
|
||||
*/
|
||||
|
||||
fd = open("/dev/vhost-vsock", O_RDWR|O_CLOEXEC);
|
||||
if (fd >= 0)
|
||||
return true;
|
||||
if (errno == ENODEV) {
|
||||
log_debug_errno(errno, "/dev/vhost-vsock device doesn't exist. Not adding a vsock device to the virtual machine.");
|
||||
return false;
|
||||
}
|
||||
if (errno == EPERM) {
|
||||
log_debug_errno(errno, "Permission denied to access /dev/vhost-vsock. Not adding a vsock device to the virtual machine.");
|
||||
return false;
|
||||
}
|
||||
|
||||
return -errno;
|
||||
}
|
||||
|
||||
/* holds the data retrieved from the QEMU firmware interop JSON data */
|
||||
typedef struct FirmwareData {
|
||||
char **features;
|
||||
@ -237,3 +266,79 @@ int find_qemu_binary(char **ret_qemu_binary) {
|
||||
|
||||
return find_executable(qemu_arch_specific, ret_qemu_binary);
|
||||
}
|
||||
|
||||
int vsock_fix_child_cid(unsigned *machine_cid, const char *machine, int *ret_child_sock) {
|
||||
/* this is an arbitrary value picked from /dev/urandom */
|
||||
static const uint8_t sip_key[HASH_KEY_SIZE] = {
|
||||
0x03, 0xad, 0xf0, 0xa4,
|
||||
0x59, 0x2c, 0x77, 0x11,
|
||||
0xda, 0x39, 0x0c, 0xba,
|
||||
0xf5, 0x4c, 0x80, 0x52
|
||||
};
|
||||
struct siphash machine_hash_state, state;
|
||||
_cleanup_close_ int vfd = -EBADF;
|
||||
int r;
|
||||
|
||||
/* uint64_t is required here for the ioctl call, but valid CIDs are only 32 bits */
|
||||
uint64_t cid = *ASSERT_PTR(machine_cid);
|
||||
|
||||
assert(machine);
|
||||
assert(ret_child_sock);
|
||||
|
||||
/* Fix the CID of the AF_VSOCK socket passed to qemu
|
||||
*
|
||||
* If the user has passed us a CID (machine_cid != VMADDR_CID_ANY), then attempt to bind to that CID
|
||||
* and error if we cannot.
|
||||
*
|
||||
* Otherwise hash the machine name to get a random CID and attempt to bind to that.
|
||||
* If it is occupied add more information into the hash and try again.
|
||||
* If after 64 attempts this hasn't worked fallback to truly random CIDs.
|
||||
* If after another 64 attempts this hasn't worked then give up and return EADDRNOTAVAIL.
|
||||
*/
|
||||
|
||||
/* remove O_CLOEXEC before this fd is passed to QEMU */
|
||||
vfd = open("/dev/vhost-vsock", O_RDWR|O_CLOEXEC);
|
||||
if (vfd < 0)
|
||||
return log_debug_errno(errno, "Failed to open /dev/vhost-vsock as read/write: %m");
|
||||
|
||||
if (cid != VMADDR_CID_ANY) {
|
||||
r = ioctl(vfd, VHOST_VSOCK_SET_GUEST_CID, &cid);
|
||||
if (r < 0)
|
||||
return log_debug_errno(errno, "Failed to set CID for child vsock with user provided CID %" PRIu64 ": %m", cid);
|
||||
*ret_child_sock = TAKE_FD(vfd);
|
||||
return 0;
|
||||
}
|
||||
|
||||
siphash24_init(&machine_hash_state, sip_key);
|
||||
siphash24_compress_string(machine, &machine_hash_state);
|
||||
for (unsigned i = 0; i < 64; i++) {
|
||||
state = machine_hash_state;
|
||||
siphash24_compress_safe(&i, sizeof i, &state);
|
||||
uint64_t hash = siphash24_finalize(&state);
|
||||
|
||||
cid = 3 + (hash % (UINT_MAX - 4));
|
||||
r = ioctl(vfd, VHOST_VSOCK_SET_GUEST_CID, &cid);
|
||||
if (r >= 0) {
|
||||
*machine_cid = cid;
|
||||
*ret_child_sock = TAKE_FD(vfd);
|
||||
return 0;
|
||||
}
|
||||
if (errno != EADDRINUSE)
|
||||
return -errno;
|
||||
}
|
||||
|
||||
for (unsigned i = 0; i < 64; i++) {
|
||||
cid = 3 + random_u64_range(UINT_MAX - 4);
|
||||
r = ioctl(vfd, VHOST_VSOCK_SET_GUEST_CID, &cid);
|
||||
if (r >= 0) {
|
||||
*machine_cid = cid;
|
||||
*ret_child_sock = TAKE_FD(vfd);
|
||||
return 0;
|
||||
}
|
||||
|
||||
if (errno != EADDRINUSE)
|
||||
return -errno;
|
||||
}
|
||||
|
||||
return log_debug_errno(SYNTHETIC_ERRNO(EADDRNOTAVAIL), "Failed to assign a CID to the guest vsock");
|
||||
}
|
||||
|
@ -20,5 +20,7 @@ OvmfConfig* ovmf_config_free(OvmfConfig *ovmf_config);
|
||||
DEFINE_TRIVIAL_CLEANUP_FUNC(OvmfConfig*, ovmf_config_free);
|
||||
|
||||
int qemu_check_kvm_support(void);
|
||||
int qemu_check_vsock_support(void);
|
||||
int find_ovmf_config(int search_sb, OvmfConfig **ret_ovmf_config);
|
||||
int find_qemu_binary(char **ret_qemu_binary);
|
||||
int vsock_fix_child_cid(unsigned *machine_cid, const char *machine, int *ret_child_sock);
|
||||
|
@ -9,12 +9,15 @@
|
||||
#include "alloc-util.h"
|
||||
#include "architecture.h"
|
||||
#include "build.h"
|
||||
#include "common-signal.h"
|
||||
#include "copy.h"
|
||||
#include "creds-util.h"
|
||||
#include "escape.h"
|
||||
#include "fileio.h"
|
||||
#include "format-util.h"
|
||||
#include "fs-util.h"
|
||||
#include "hexdecoct.h"
|
||||
#include "hostname-util.h"
|
||||
#include "log.h"
|
||||
#include "machine-credential.h"
|
||||
#include "main-func.h"
|
||||
@ -24,6 +27,9 @@
|
||||
#include "path-util.h"
|
||||
#include "pretty-print.h"
|
||||
#include "process-util.h"
|
||||
#include "sd-event.h"
|
||||
#include "signal-util.h"
|
||||
#include "socket-util.h"
|
||||
#include "strv.h"
|
||||
#include "tmpfile-util.h"
|
||||
#include "vmspawn-settings.h"
|
||||
@ -31,9 +37,12 @@
|
||||
|
||||
static PagerFlags arg_pager_flags = 0;
|
||||
static char *arg_image = NULL;
|
||||
static char *arg_machine = NULL;
|
||||
static char *arg_qemu_smp = NULL;
|
||||
static uint64_t arg_qemu_mem = 2ULL * 1024ULL * 1024ULL * 1024ULL;
|
||||
static int arg_qemu_kvm = -1;
|
||||
static int arg_qemu_vsock = -1;
|
||||
static uint64_t arg_vsock_cid = UINT64_MAX;
|
||||
static bool arg_qemu_gui = false;
|
||||
static int arg_secure_boot = -1;
|
||||
static MachineCredential *arg_credentials = NULL;
|
||||
@ -42,6 +51,7 @@ static SettingsMask arg_settings_mask = 0;
|
||||
static char **arg_parameters = NULL;
|
||||
|
||||
STATIC_DESTRUCTOR_REGISTER(arg_image, freep);
|
||||
STATIC_DESTRUCTOR_REGISTER(arg_machine, freep);
|
||||
STATIC_DESTRUCTOR_REGISTER(arg_qemu_smp, freep);
|
||||
STATIC_DESTRUCTOR_REGISTER(arg_parameters, strv_freep);
|
||||
|
||||
@ -66,9 +76,14 @@ static int help(void) {
|
||||
"%3$sHost Configuration:%4$s\n"
|
||||
" --qemu-smp=SMP Configure guest's SMP settings\n"
|
||||
" --qemu-mem=MEM Configure guest's RAM size\n"
|
||||
" --qemu-kvm= Configure whether to use KVM or not\n"
|
||||
" --qemu-kvm=BOOL Configure whether to use KVM or not\n"
|
||||
" --qemu-vsock=BOOL Configure whether to use qemu with a vsock or not\n"
|
||||
" --vsock-cid= Specify the CID to use for the qemu guest's vsock\n"
|
||||
" --qemu-gui Start QEMU in graphical mode\n"
|
||||
" --secure-boot= Configure searching for firmware with SB support\n\n"
|
||||
" --secure-boot=BOOL Configure whether to search for firmware which\n"
|
||||
" supports Secure Boot\n\n"
|
||||
"%3$sSystem Identity:%4$s\n"
|
||||
" -M --machine=NAME Set the machine name for the container\n"
|
||||
"%3$sCredentials:%4$s\n"
|
||||
" --set-credential=ID:VALUE\n"
|
||||
" Pass a credential with literal value to container.\n"
|
||||
@ -93,6 +108,8 @@ static int parse_argv(int argc, char *argv[]) {
|
||||
ARG_QEMU_SMP,
|
||||
ARG_QEMU_MEM,
|
||||
ARG_QEMU_KVM,
|
||||
ARG_QEMU_VSOCK,
|
||||
ARG_VSOCK_CID,
|
||||
ARG_QEMU_GUI,
|
||||
ARG_SECURE_BOOT,
|
||||
ARG_SET_CREDENTIAL,
|
||||
@ -104,9 +121,12 @@ static int parse_argv(int argc, char *argv[]) {
|
||||
{ "version", no_argument, NULL, ARG_VERSION },
|
||||
{ "no-pager", no_argument, NULL, ARG_NO_PAGER },
|
||||
{ "image", required_argument, NULL, 'i' },
|
||||
{ "machine", required_argument, NULL, 'M' },
|
||||
{ "qemu-smp", required_argument, NULL, ARG_QEMU_SMP },
|
||||
{ "qemu-mem", required_argument, NULL, ARG_QEMU_MEM },
|
||||
{ "qemu-kvm", required_argument, NULL, ARG_QEMU_KVM },
|
||||
{ "qemu-vsock", required_argument, NULL, ARG_QEMU_VSOCK },
|
||||
{ "vsock-cid", required_argument, NULL, ARG_VSOCK_CID },
|
||||
{ "qemu-gui", no_argument, NULL, ARG_QEMU_GUI },
|
||||
{ "secure-boot", required_argument, NULL, ARG_SECURE_BOOT },
|
||||
{ "set-credential", required_argument, NULL, ARG_SET_CREDENTIAL },
|
||||
@ -120,7 +140,7 @@ static int parse_argv(int argc, char *argv[]) {
|
||||
assert(argv);
|
||||
|
||||
optind = 0;
|
||||
while ((c = getopt_long(argc, argv, "+hi:", options, NULL)) >= 0)
|
||||
while ((c = getopt_long(argc, argv, "+hi:M", options, NULL)) >= 0)
|
||||
switch (c) {
|
||||
case 'h':
|
||||
return help();
|
||||
@ -136,6 +156,20 @@ static int parse_argv(int argc, char *argv[]) {
|
||||
arg_settings_mask |= SETTING_DIRECTORY;
|
||||
break;
|
||||
|
||||
case 'M':
|
||||
if (isempty(optarg))
|
||||
arg_machine = mfree(arg_machine);
|
||||
else {
|
||||
if (!hostname_is_valid(optarg, 0))
|
||||
return log_error_errno(SYNTHETIC_ERRNO(EINVAL),
|
||||
"Invalid machine name: %s", optarg);
|
||||
|
||||
r = free_and_strdup(&arg_machine, optarg);
|
||||
if (r < 0)
|
||||
return log_oom();
|
||||
}
|
||||
break;
|
||||
|
||||
case ARG_NO_PAGER:
|
||||
arg_pager_flags |= PAGER_DISABLE;
|
||||
break;
|
||||
@ -158,6 +192,27 @@ static int parse_argv(int argc, char *argv[]) {
|
||||
return log_error_errno(r, "Failed to parse --qemu-kvm=%s: %m", optarg);
|
||||
break;
|
||||
|
||||
case ARG_QEMU_VSOCK:
|
||||
r = parse_tristate(optarg, &arg_qemu_vsock);
|
||||
if (r < 0)
|
||||
return log_error_errno(r, "Failed to parse --qemu-vsock=%s: %m", optarg);
|
||||
break;
|
||||
|
||||
case ARG_VSOCK_CID: {
|
||||
unsigned cid;
|
||||
if (isempty(optarg))
|
||||
cid = VMADDR_CID_ANY;
|
||||
else {
|
||||
r = safe_atou_bounded(optarg, 3, UINT_MAX - 1, &cid);
|
||||
if (r == -ERANGE)
|
||||
return log_error_errno(r, "Invalid value for --vsock-cid=: %m");
|
||||
if (r < 0)
|
||||
return log_error_errno(r, "Failed to parse --vsock-cid=%s: %m", optarg);
|
||||
}
|
||||
arg_vsock_cid = (uint64_t)cid;
|
||||
break;
|
||||
}
|
||||
|
||||
case ARG_QEMU_GUI:
|
||||
arg_qemu_gui = true;
|
||||
break;
|
||||
@ -208,11 +263,187 @@ static int parse_argv(int argc, char *argv[]) {
|
||||
return 1;
|
||||
}
|
||||
|
||||
static int open_vsock(void) {
|
||||
_cleanup_close_ int vsock_fd = -EBADF;
|
||||
int r;
|
||||
static const union sockaddr_union bind_addr = {
|
||||
.vm.svm_family = AF_VSOCK,
|
||||
.vm.svm_cid = VMADDR_CID_ANY,
|
||||
.vm.svm_port = VMADDR_PORT_ANY,
|
||||
};
|
||||
|
||||
vsock_fd = socket(AF_VSOCK, SOCK_STREAM|SOCK_CLOEXEC, 0);
|
||||
if (vsock_fd < 0)
|
||||
return log_error_errno(errno, "Failed to open AF_VSOCK socket: %m");
|
||||
|
||||
r = bind(vsock_fd, &bind_addr.sa, sizeof(bind_addr.vm));
|
||||
if (r < 0)
|
||||
return log_error_errno(errno, "Failed to bind to vsock to address %u:%u: %m", bind_addr.vm.svm_cid, bind_addr.vm.svm_port);
|
||||
|
||||
r = listen(vsock_fd, SOMAXCONN_DELUXE);
|
||||
if (r < 0)
|
||||
return log_error_errno(errno, "Failed to listen on vsock: %m");
|
||||
|
||||
return TAKE_FD(vsock_fd);
|
||||
}
|
||||
|
||||
static int vmspawn_dispatch_notify_fd(sd_event_source *source, int fd, uint32_t revents, void *userdata) {
|
||||
char buf[NOTIFY_BUFFER_MAX+1];
|
||||
const char *p = NULL;
|
||||
struct iovec iovec = {
|
||||
.iov_base = buf,
|
||||
.iov_len = sizeof(buf)-1,
|
||||
};
|
||||
struct msghdr msghdr = {
|
||||
.msg_iov = &iovec,
|
||||
.msg_iovlen = 1,
|
||||
};
|
||||
ssize_t n;
|
||||
_cleanup_strv_free_ char **tags = NULL;
|
||||
int r, *exit_status = ASSERT_PTR(userdata);
|
||||
|
||||
n = recvmsg_safe(fd, &msghdr, MSG_DONTWAIT);
|
||||
if (ERRNO_IS_NEG_TRANSIENT(n))
|
||||
return 0;
|
||||
if (n == -EXFULL) {
|
||||
log_warning_errno(n, "Got message with truncated control data, ignoring: %m");
|
||||
return 0;
|
||||
}
|
||||
if (n < 0)
|
||||
return log_warning_errno(n, "Couldn't read notification socket: %m");
|
||||
|
||||
if ((size_t) n >= sizeof(buf)) {
|
||||
log_warning("Received notify message exceeded maximum size. Ignoring.");
|
||||
return 0;
|
||||
}
|
||||
|
||||
buf[n] = 0;
|
||||
tags = strv_split(buf, "\n\r");
|
||||
if (!tags)
|
||||
return log_oom();
|
||||
|
||||
STRV_FOREACH(s, tags)
|
||||
log_debug("Received tag %s from notify socket", *s);
|
||||
|
||||
if (strv_contains(tags, "READY=1")) {
|
||||
r = sd_notify(false, "READY=1\n");
|
||||
if (r < 0)
|
||||
log_warning_errno(r, "Failed to send readiness notification, ignoring: %m");
|
||||
}
|
||||
|
||||
p = strv_find_startswith(tags, "STATUS=");
|
||||
if (p)
|
||||
(void) sd_notifyf(false, "STATUS=VM running: %s", p);
|
||||
|
||||
p = strv_find_startswith(tags, "EXIT_STATUS=");
|
||||
if (p) {
|
||||
r = safe_atoi(p, exit_status);
|
||||
if (r < 0)
|
||||
log_warning_errno(r, "Failed to parse exit status from %s, ignoring: %m", p);
|
||||
}
|
||||
|
||||
/* we will only receive one message from each connection so disable this source once one is received */
|
||||
source = sd_event_source_disable_unref(source);
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int vmspawn_dispatch_vsock_connections(sd_event_source *source, int fd, uint32_t revents, void *userdata) {
|
||||
int r;
|
||||
sd_event *event;
|
||||
_cleanup_close_ int conn_fd = -EBADF;
|
||||
|
||||
assert(userdata);
|
||||
|
||||
if (revents != EPOLLIN) {
|
||||
log_warning("Got unexpected poll event for vsock fd.");
|
||||
return 0;
|
||||
}
|
||||
|
||||
conn_fd = accept4(fd, NULL, NULL, SOCK_CLOEXEC|SOCK_NONBLOCK);
|
||||
if (conn_fd < 0) {
|
||||
log_warning_errno(errno, "Failed to accept connection from vsock fd (%m), ignoring...");
|
||||
return 0;
|
||||
}
|
||||
|
||||
event = sd_event_source_get_event(source);
|
||||
if (!event)
|
||||
return log_error_errno(SYNTHETIC_ERRNO(ENOENT), "Failed to retrieve event from event source, exiting task");
|
||||
|
||||
/* add a new floating task to read from the connection */
|
||||
r = sd_event_add_io(event, NULL, conn_fd, revents, vmspawn_dispatch_notify_fd, userdata);
|
||||
if (r < 0)
|
||||
return log_error_errno(r, "Failed to allocate notify connection event source: %m");
|
||||
|
||||
/* conn_fd is now owned by the event loop so don't clean it up */
|
||||
TAKE_FD(conn_fd);
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int setup_notify_parent(sd_event *event, int fd, int *exit_status, sd_event_source **notify_event_source) {
|
||||
int r;
|
||||
|
||||
r = sd_event_add_io(event, notify_event_source, fd, EPOLLIN, vmspawn_dispatch_vsock_connections, exit_status);
|
||||
if (r < 0)
|
||||
return log_error_errno(r, "Failed to allocate notify socket event source: %m");
|
||||
|
||||
(void) sd_event_source_set_description(*notify_event_source, "vmspawn-notify-sock");
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int on_orderly_shutdown(sd_event_source *s, const struct signalfd_siginfo *si, void *userdata) {
|
||||
pid_t pid;
|
||||
|
||||
pid = PTR_TO_PID(userdata);
|
||||
if (pid > 0) {
|
||||
/* TODO: actually talk to qemu and ask the guest to shutdown here */
|
||||
if (kill(pid, SIGKILL) >= 0) {
|
||||
log_info("Trying to halt qemu. Send SIGTERM again to trigger vmspawn to immediately terminate.");
|
||||
sd_event_source_set_userdata(s, NULL);
|
||||
return 0;
|
||||
}
|
||||
}
|
||||
|
||||
sd_event_exit(sd_event_source_get_event(s), 0);
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int on_child_exit(sd_event_source *s, const siginfo_t *si, void *userdata) {
|
||||
sd_event_exit(sd_event_source_get_event(s), 0);
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int cmdline_add_vsock(char ***cmdline, int vsock_fd) {
|
||||
int r;
|
||||
|
||||
r = strv_extend(cmdline, "-smbios");
|
||||
if (r < 0)
|
||||
return r;
|
||||
|
||||
union sockaddr_union addr;
|
||||
socklen_t addr_len = sizeof addr.vm;
|
||||
r = getsockname(vsock_fd, &addr.sa, &addr_len);
|
||||
if (r < 0)
|
||||
return -errno;
|
||||
assert(addr_len >= sizeof addr.vm);
|
||||
assert(addr.vm.svm_family == AF_VSOCK);
|
||||
|
||||
log_info("Using vsock-stream:%u:%u", (unsigned) VMADDR_CID_HOST, addr.vm.svm_port);
|
||||
r = strv_extendf(cmdline, "type=11,value=io.systemd.credential:vmm.notify_socket=vsock-stream:%u:%u", (unsigned) VMADDR_CID_HOST, addr.vm.svm_port);
|
||||
if (r < 0)
|
||||
return r;
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int run_virtual_machine(void) {
|
||||
_cleanup_(ovmf_config_freep) OvmfConfig *ovmf_config = NULL;
|
||||
_cleanup_strv_free_ char **cmdline = NULL;
|
||||
_cleanup_free_ char *machine = NULL, *qemu_binary = NULL, *mem = NULL, *kcl = NULL;
|
||||
_cleanup_free_ char *machine = NULL, *qemu_binary = NULL, *mem = NULL;
|
||||
int r;
|
||||
_cleanup_close_ int vsock_fd = -EBADF;
|
||||
|
||||
bool use_kvm = arg_qemu_kvm > 0;
|
||||
if (arg_qemu_kvm < 0) {
|
||||
@ -232,11 +463,10 @@ static int run_virtual_machine(void) {
|
||||
"falling back to OVMF firmware blobs without Secure Boot support.");
|
||||
|
||||
const char *accel = use_kvm ? "kvm" : "tcg";
|
||||
#ifdef __aarch64__
|
||||
if (IN_SET(native_architecture(), ARCHITECTURE_ARM64, ARCHITECTURE_ARM64_BE))
|
||||
machine = strjoin("type=virt,accel=", accel);
|
||||
#else
|
||||
else
|
||||
machine = strjoin("type=q35,accel=", accel, ",smm=", on_off(ovmf_config->supports_sb));
|
||||
#endif
|
||||
if (!machine)
|
||||
return log_oom();
|
||||
|
||||
@ -256,9 +486,43 @@ static int run_virtual_machine(void) {
|
||||
"-m", mem,
|
||||
"-object", "rng-random,filename=/dev/urandom,id=rng0",
|
||||
"-device", "virtio-rng-pci,rng=rng0,id=rng-device0",
|
||||
"-nic", "user,model=virtio-net-pci",
|
||||
"-cpu", "max"
|
||||
"-nic", "user,model=virtio-net-pci"
|
||||
);
|
||||
if (!cmdline)
|
||||
return log_oom();
|
||||
|
||||
bool use_vsock = arg_qemu_vsock > 0 && ARCHITECTURE_SUPPORTS_SMBIOS;
|
||||
if (arg_qemu_vsock < 0) {
|
||||
r = qemu_check_vsock_support();
|
||||
if (r < 0)
|
||||
return log_error_errno(r, "Failed to check for VSock support: %m");
|
||||
|
||||
use_vsock = r;
|
||||
}
|
||||
|
||||
unsigned child_cid = VMADDR_CID_ANY;
|
||||
_cleanup_close_ int child_vsock_fd = -EBADF;
|
||||
if (use_vsock) {
|
||||
if (arg_vsock_cid < UINT_MAX)
|
||||
child_cid = (unsigned)arg_vsock_cid;
|
||||
|
||||
r = vsock_fix_child_cid(&child_cid, arg_machine, &child_vsock_fd);
|
||||
if (r < 0)
|
||||
return log_error_errno(r, "Failed to fix CID for the guest vsock socket: %m");
|
||||
|
||||
r = strv_extend(&cmdline, "-device");
|
||||
if (r < 0)
|
||||
return log_oom();
|
||||
|
||||
log_debug("vhost-vsock-pci,guest-cid=%u,vhostfd=%d", child_cid, child_vsock_fd);
|
||||
r = strv_extendf(&cmdline, "vhost-vsock-pci,guest-cid=%u,vhostfd=%d", child_cid, child_vsock_fd);
|
||||
if (r < 0)
|
||||
return log_oom();
|
||||
}
|
||||
|
||||
r = strv_extend_strv(&cmdline, STRV_MAKE("-cpu", "max"), /* filter_duplicates= */ false);
|
||||
if (r < 0)
|
||||
return log_oom();
|
||||
|
||||
if (arg_qemu_gui) {
|
||||
r = strv_extend_strv(&cmdline, STRV_MAKE("-vga", "virtio"), /* filter_duplicates= */ false);
|
||||
@ -271,12 +535,12 @@ static int run_virtual_machine(void) {
|
||||
"-chardev", "stdio,mux=on,id=console,signal=off",
|
||||
"-serial", "chardev:console",
|
||||
"-mon", "console"
|
||||
), false);
|
||||
), /* filter_duplicates= */ false);
|
||||
if (r < 0)
|
||||
return log_oom();
|
||||
}
|
||||
|
||||
#if ARCHITECTURE_SUPPORTS_SMBIOS
|
||||
if (ARCHITECTURE_SUPPORTS_SMBIOS) {
|
||||
ssize_t n;
|
||||
FOREACH_ARRAY(cred, arg_credentials, arg_n_credentials) {
|
||||
_cleanup_free_ char *cred_data_b64 = NULL;
|
||||
@ -293,7 +557,7 @@ static int run_virtual_machine(void) {
|
||||
if (r < 0)
|
||||
return log_oom();
|
||||
}
|
||||
#endif
|
||||
}
|
||||
|
||||
r = strv_extend(&cmdline, "-drive");
|
||||
if (r < 0)
|
||||
@ -303,9 +567,9 @@ static int run_virtual_machine(void) {
|
||||
if (r < 0)
|
||||
return log_oom();
|
||||
|
||||
_cleanup_(unlink_and_freep) char *ovmf_vars_to = NULL;
|
||||
if (ovmf_config->supports_sb) {
|
||||
const char *ovmf_vars_from = ovmf_config->vars;
|
||||
_cleanup_free_ char *ovmf_vars_to = NULL;
|
||||
_cleanup_close_ int source_fd = -EBADF, target_fd = -EBADF;
|
||||
|
||||
r = tempfn_random_child(NULL, "vmspawn-", &ovmf_vars_to);
|
||||
@ -333,7 +597,7 @@ static int run_virtual_machine(void) {
|
||||
"-global", "ICH9-LPC.disable_s3=1",
|
||||
"-global", "driver=cfi.pflash01,property=secure,value=on",
|
||||
"-drive"
|
||||
), false);
|
||||
), /* filter_duplicates= */ false);
|
||||
if (r < 0)
|
||||
return log_oom();
|
||||
|
||||
@ -353,13 +617,13 @@ static int run_virtual_machine(void) {
|
||||
r = strv_extend_strv(&cmdline, STRV_MAKE(
|
||||
"-device", "virtio-scsi-pci,id=scsi",
|
||||
"-device", "scsi-hd,drive=mkosi,bootindex=1"
|
||||
), false);
|
||||
), /* filter_duplicates= */ false);
|
||||
if (r < 0)
|
||||
return log_oom();
|
||||
|
||||
if (strv_length(arg_parameters) != 0) {
|
||||
#if ARCHITECTURE_SUPPORTS_SMBIOS
|
||||
kcl = strv_join(arg_parameters, " ");
|
||||
if (!strv_isempty(arg_parameters)) {
|
||||
if (ARCHITECTURE_SUPPORTS_SMBIOS) {
|
||||
_cleanup_free_ char *kcl = strv_join(arg_parameters, " ");
|
||||
if (!kcl)
|
||||
return log_oom();
|
||||
|
||||
@ -370,13 +634,39 @@ static int run_virtual_machine(void) {
|
||||
r = strv_extendf(&cmdline, "type=11,value=io.systemd.stub.kernel-cmdline-extra=%s", kcl);
|
||||
if (r < 0)
|
||||
return log_oom();
|
||||
#else
|
||||
} else
|
||||
log_warning("Cannot append extra args to kernel cmdline, native architecture doesn't support SMBIOS");
|
||||
#endif
|
||||
}
|
||||
|
||||
if (use_vsock) {
|
||||
vsock_fd = open_vsock();
|
||||
if (vsock_fd < 0)
|
||||
return log_error_errno(vsock_fd, "Failed to open vsock: %m");
|
||||
|
||||
r = cmdline_add_vsock(&cmdline, vsock_fd);
|
||||
if (r == -ENOMEM)
|
||||
return log_oom();
|
||||
if (r < 0)
|
||||
return log_error_errno(r, "Failed to call getsockname on vsock: %m");
|
||||
}
|
||||
|
||||
_cleanup_(sd_event_source_unrefp) sd_event_source *notify_event_source = NULL;
|
||||
_cleanup_(sd_event_unrefp) sd_event *event = NULL;
|
||||
r = sd_event_new(&event);
|
||||
if (r < 0)
|
||||
return log_error_errno(r, "Failed to get default event source: %m");
|
||||
|
||||
(void) sd_event_set_watchdog(event, true);
|
||||
|
||||
pid_t child_pid;
|
||||
r = safe_fork(qemu_binary, 0, &child_pid);
|
||||
r = safe_fork_full(
|
||||
qemu_binary,
|
||||
NULL,
|
||||
&child_vsock_fd, 1, /* pass the vsock fd to qemu */
|
||||
FORK_CLOEXEC_OFF,
|
||||
&child_pid);
|
||||
if (r < 0)
|
||||
return log_error_errno(r, "Failed to fork off %s: %m", qemu_binary);
|
||||
if (r == 0) {
|
||||
/* set TERM and LANG if they are missing */
|
||||
if (setenv("TERM", "vt220", 0) < 0)
|
||||
@ -390,7 +680,64 @@ static int run_virtual_machine(void) {
|
||||
_exit(EXIT_FAILURE);
|
||||
}
|
||||
|
||||
return wait_for_terminate_and_check(qemu_binary, child_pid, WAIT_LOG);
|
||||
|
||||
int exit_status = INT_MAX;
|
||||
if (use_vsock) {
|
||||
r = setup_notify_parent(event, vsock_fd, &exit_status, ¬ify_event_source);
|
||||
if (r < 0)
|
||||
return log_error_errno(r, "Failed to setup event loop to handle vsock notify events: %m");
|
||||
}
|
||||
|
||||
/* shutdown qemu when we are shutdown */
|
||||
(void) sd_event_add_signal(event, NULL, SIGINT, on_orderly_shutdown, PID_TO_PTR(child_pid));
|
||||
(void) sd_event_add_signal(event, NULL, SIGTERM, on_orderly_shutdown, PID_TO_PTR(child_pid));
|
||||
|
||||
(void) sd_event_add_signal(event, NULL, SIGRTMIN+18, sigrtmin18_handler, NULL);
|
||||
|
||||
/* Exit when the child exits */
|
||||
(void) sd_event_add_child(event, NULL, child_pid, WEXITED, on_child_exit, NULL);
|
||||
|
||||
r = sd_event_loop(event);
|
||||
if (r < 0)
|
||||
return log_error_errno(r, "Failed to run event loop: %m");
|
||||
|
||||
if (use_vsock) {
|
||||
if (exit_status == INT_MAX) {
|
||||
log_debug("Couldn't retrieve inner EXIT_STATUS from vsock");
|
||||
return EXIT_SUCCESS;
|
||||
}
|
||||
if (exit_status != 0)
|
||||
log_warning("Non-zero exit code received: %d", exit_status);
|
||||
return exit_status;
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int determine_names(void) {
|
||||
int r;
|
||||
|
||||
if (!arg_image)
|
||||
return log_error_errno(SYNTHETIC_ERRNO(-EINVAL), "Missing required argument -i/--image=, quitting");
|
||||
|
||||
if (!arg_machine) {
|
||||
char *e;
|
||||
|
||||
r = path_extract_filename(arg_image, &arg_machine);
|
||||
if (r < 0)
|
||||
return log_error_errno(r, "Failed to extract file name from '%s': %m", arg_image);
|
||||
|
||||
/* Truncate suffix if there is one */
|
||||
e = endswith(arg_machine, ".raw");
|
||||
if (e)
|
||||
*e = 0;
|
||||
|
||||
hostname_cleanup(arg_machine);
|
||||
if (!hostname_is_valid(arg_machine, 0))
|
||||
return log_error_errno(SYNTHETIC_ERRNO(EINVAL), "Failed to determine machine name automatically, please use -M.");
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int run(int argc, char *argv[]) {
|
||||
@ -402,12 +749,15 @@ static int run(int argc, char *argv[]) {
|
||||
if (r <= 0)
|
||||
goto finish;
|
||||
|
||||
if (!arg_image) {
|
||||
log_error("Missing required argument -i/--image, quitting");
|
||||
r = determine_names();
|
||||
if (r < 0)
|
||||
goto finish;
|
||||
}
|
||||
|
||||
assert_se(sigprocmask_many(SIG_BLOCK, NULL, SIGCHLD, SIGTERM, SIGINT, SIGRTMIN+18, -1) >= 0);
|
||||
|
||||
r = run_virtual_machine();
|
||||
if (r > 0)
|
||||
ret = r;
|
||||
finish:
|
||||
machine_credential_free_all(arg_credentials, arg_n_credentials);
|
||||
|
||||
|
Loading…
Reference in New Issue
Block a user