1
0
mirror of https://github.com/systemd/systemd.git synced 2025-02-28 05:57:33 +03:00

util-lib/systemd-run: implement race-free PTY peer opening (#34953)

This makes use of the new TIOCGPTPEER pty ioctl() for directly opening a
PTY peer, without going via path names. This is nice because it closes a
race around allocating and opening the peer. And also has the nice
benefit that if we acquired an fd originating from some other
namespace/container, we can directly derive the peer fd from it, without
having to reenter the namespace again.
This commit is contained in:
Luca Boccassi 2024-11-01 11:29:19 +00:00 committed by GitHub
commit fdccba15be
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
5 changed files with 165 additions and 77 deletions

View File

@ -323,7 +323,6 @@ int show_menu(char **x, unsigned n_columns, unsigned width, unsigned percentage)
int open_terminal(const char *name, int mode) {
_cleanup_close_ int fd = -EBADF;
unsigned c = 0;
/*
* If a TTY is in the process of being closed opening it might cause EIO. This is horribly awful, but
@ -333,10 +332,9 @@ int open_terminal(const char *name, int mode) {
* https://bugs.launchpad.net/ubuntu/+source/linux/+bug/554172/comments/245
*/
if ((mode & (O_CREAT|O_PATH|O_DIRECTORY|O_TMPFILE)) != 0)
return -EINVAL;
assert((mode & (O_CREAT|O_PATH|O_DIRECTORY|O_TMPFILE)) == 0);
for (;;) {
for (unsigned c = 0;; c++) {
fd = open(name, mode, 0);
if (fd >= 0)
break;
@ -346,10 +344,9 @@ int open_terminal(const char *name, int mode) {
/* Max 1s in total */
if (c >= 20)
return -errno;
return -EIO;
(void) usleep_safe(50 * USEC_PER_MSEC);
c++;
}
if (!isatty_safe(fd))
@ -1296,16 +1293,16 @@ int ptsname_malloc(int fd, char **ret) {
}
}
int openpt_allocate(int flags, char **ret_slave) {
int openpt_allocate(int flags, char **ret_peer_path) {
_cleanup_close_ int fd = -EBADF;
_cleanup_free_ char *p = NULL;
int r;
fd = posix_openpt(flags|O_NOCTTY|O_CLOEXEC);
if (fd < 0)
return -errno;
if (ret_slave) {
_cleanup_free_ char *p = NULL;
if (ret_peer_path) {
r = ptsname_malloc(fd, &p);
if (r < 0)
return r;
@ -1317,20 +1314,22 @@ int openpt_allocate(int flags, char **ret_slave) {
if (unlockpt(fd) < 0)
return -errno;
if (ret_slave)
*ret_slave = TAKE_PTR(p);
if (ret_peer_path)
*ret_peer_path = TAKE_PTR(p);
return TAKE_FD(fd);
}
static int ptsname_namespace(int pty, char **ret) {
int no = -1, r;
int no = -1;
assert(pty >= 0);
assert(ret);
/* Like ptsname(), but doesn't assume that the path is
* accessible in the local namespace. */
r = ioctl(pty, TIOCGPTN, &no);
if (r < 0)
if (ioctl(pty, TIOCGPTN, &no) < 0)
return -errno;
if (no < 0)
@ -1342,10 +1341,9 @@ static int ptsname_namespace(int pty, char **ret) {
return 0;
}
int openpt_allocate_in_namespace(pid_t pid, int flags, char **ret_slave) {
int openpt_allocate_in_namespace(pid_t pid, int flags, char **ret_peer_path) {
_cleanup_close_ int pidnsfd = -EBADF, mntnsfd = -EBADF, usernsfd = -EBADF, rootfd = -EBADF, fd = -EBADF;
_cleanup_close_pair_ int pair[2] = EBADF_PAIR;
pid_t child;
int r;
assert(pid > 0);
@ -1354,17 +1352,27 @@ int openpt_allocate_in_namespace(pid_t pid, int flags, char **ret_slave) {
if (r < 0)
return r;
if (socketpair(AF_UNIX, SOCK_DGRAM, 0, pair) < 0)
if (socketpair(AF_UNIX, SOCK_DGRAM|SOCK_CLOEXEC, 0, pair) < 0)
return -errno;
r = namespace_fork("(sd-openptns)", "(sd-openpt)", NULL, 0, FORK_RESET_SIGNALS|FORK_DEATHSIG_SIGKILL,
pidnsfd, mntnsfd, -1, usernsfd, rootfd, &child);
r = namespace_fork(
"(sd-openptns)",
"(sd-openpt)",
/* except_fds= */ NULL,
/* n_except_fds= */ 0,
FORK_RESET_SIGNALS|FORK_DEATHSIG_SIGKILL|FORK_WAIT,
pidnsfd,
mntnsfd,
/* netns_fd= */ -EBADF,
usernsfd,
rootfd,
/* ret_pid= */ NULL);
if (r < 0)
return r;
if (r == 0) {
pair[0] = safe_close(pair[0]);
fd = openpt_allocate(flags, NULL);
fd = openpt_allocate(flags, /* ret_peer_path= */ NULL);
if (fd < 0)
_exit(EXIT_FAILURE);
@ -1376,18 +1384,12 @@ int openpt_allocate_in_namespace(pid_t pid, int flags, char **ret_slave) {
pair[1] = safe_close(pair[1]);
r = wait_for_terminate_and_check("(sd-openptns)", child, 0);
if (r < 0)
return r;
if (r != EXIT_SUCCESS)
return -EIO;
fd = receive_one_fd(pair[0], 0);
if (fd < 0)
return fd;
if (ret_slave) {
r = ptsname_namespace(fd, ret_slave);
if (ret_peer_path) {
r = ptsname_namespace(fd, ret_peer_path);
if (r < 0)
return r;
}
@ -1398,30 +1400,40 @@ int openpt_allocate_in_namespace(pid_t pid, int flags, char **ret_slave) {
int open_terminal_in_namespace(pid_t pid, const char *name, int mode) {
_cleanup_close_ int pidnsfd = -EBADF, mntnsfd = -EBADF, usernsfd = -EBADF, rootfd = -EBADF;
_cleanup_close_pair_ int pair[2] = EBADF_PAIR;
pid_t child;
int r;
r = namespace_open(pid, &pidnsfd, &mntnsfd, /* ret_netns_fd = */ NULL, &usernsfd, &rootfd);
assert(pid > 0);
assert(name);
r = namespace_open(pid, &pidnsfd, &mntnsfd, /* ret_netns_fd= */ NULL, &usernsfd, &rootfd);
if (r < 0)
return r;
if (socketpair(AF_UNIX, SOCK_DGRAM, 0, pair) < 0)
if (socketpair(AF_UNIX, SOCK_DGRAM|SOCK_CLOEXEC, 0, pair) < 0)
return -errno;
r = namespace_fork("(sd-terminalns)", "(sd-terminal)", NULL, 0, FORK_RESET_SIGNALS|FORK_DEATHSIG_SIGKILL,
pidnsfd, mntnsfd, -1, usernsfd, rootfd, &child);
r = namespace_fork(
"(sd-terminalns)",
"(sd-terminal)",
/* except_fds= */ NULL,
/* n_except_fds= */ 0,
FORK_RESET_SIGNALS|FORK_DEATHSIG_SIGKILL|FORK_WAIT,
pidnsfd,
mntnsfd,
/* netnsd_fd= */ -EBADF,
usernsfd,
rootfd,
/* ret_pid= */ NULL);
if (r < 0)
return r;
if (r == 0) {
int master;
pair[0] = safe_close(pair[0]);
master = open_terminal(name, mode|O_NOCTTY|O_CLOEXEC);
if (master < 0)
int pty_fd = open_terminal(name, mode|O_NOCTTY|O_CLOEXEC);
if (pty_fd < 0)
_exit(EXIT_FAILURE);
if (send_one_fd(pair[1], master, 0) < 0)
if (send_one_fd(pair[1], pty_fd, 0) < 0)
_exit(EXIT_FAILURE);
_exit(EXIT_SUCCESS);
@ -1429,12 +1441,6 @@ int open_terminal_in_namespace(pid_t pid, const char *name, int mode) {
pair[1] = safe_close(pair[1]);
r = wait_for_terminate_and_check("(sd-terminalns)", child, 0);
if (r < 0)
return r;
if (r != EXIT_SUCCESS)
return -EIO;
return receive_one_fd(pair[0], 0);
}
@ -2278,3 +2284,60 @@ int terminal_is_pty_fd(int fd) {
return true;
}
int pty_open_peer_racefree(int fd, int mode) {
assert(fd >= 0);
/* Opens the peer PTY using the new race-free TIOCGPTPEER ioctl() (kernel 4.13).
*
* This is safe to be called on TTYs from other namespaces. */
assert((mode & (O_CREAT|O_PATH|O_DIRECTORY|O_TMPFILE)) == 0);
/* This replicates the EIO retry logic of open_terminal() in a modified way. */
for (unsigned c = 0;; c++) {
int peer_fd = ioctl(fd, TIOCGPTPEER, mode);
if (peer_fd >= 0)
return peer_fd;
if (ERRNO_IS_NOT_SUPPORTED(errno) || errno == EINVAL) /* new ioctl() is not supported, return a clear error */
return -EOPNOTSUPP;
if (errno != EIO)
return -errno;
/* Max 1s in total */
if (c >= 20)
return -EIO;
(void) usleep_safe(50 * USEC_PER_MSEC);
}
}
int pty_open_peer(int fd, int mode) {
int r;
assert(fd >= 0);
/* Opens the peer PTY using the new race-free TIOCGPTPEER ioctl() (kernel 4.13) if it is
* available. Otherwise falls back to the POSIX ptsname() + open() logic.
*
* Because of the fallback path this is not safe to be called on PTYs from other namespaces. (Because
* we open the peer PTY name there via a path in the file system.) */
// TODO: Remove fallback path once baseline is updated to >= 4.13, i.e. systemd v258
int peer_fd = pty_open_peer_racefree(fd, mode);
if (peer_fd >= 0)
return peer_fd;
if (!ERRNO_IS_NEG_NOT_SUPPORTED(peer_fd))
return peer_fd;
/* The racy fallback path */
_cleanup_free_ char *peer_path = NULL;
r = ptsname_malloc(fd, &peer_path);
if (r < 0)
return r;
return open_terminal(peer_path, mode);
}

View File

@ -154,3 +154,6 @@ int terminal_get_size_by_dsr(int input_fd, int output_fd, unsigned *ret_rows, un
int terminal_fix_size(int input_fd, int output_fd);
int terminal_is_pty_fd(int fd);
int pty_open_peer_racefree(int fd, int mode);
int pty_open_peer(int fd, int mode);

View File

@ -407,7 +407,7 @@ int bus_machine_method_open_shell(sd_bus_message *message, void *userdata, sd_bu
_cleanup_strv_free_ char **env = NULL, **args_wire = NULL, **args = NULL;
_cleanup_free_ char *command_line = NULL;
Machine *m = ASSERT_PTR(userdata);
const char *p, *unit, *user, *path, *description, *utmp_id;
const char *unit, *user, *path, *description, *utmp_id;
int r;
assert(message);
@ -484,10 +484,11 @@ int bus_machine_method_open_shell(sd_bus_message *message, void *userdata, sd_bu
if (master < 0)
return master;
p = path_startswith(pty_name, "/dev/pts/");
assert(p);
slave = machine_open_terminal(m, pty_name, O_RDWR|O_NOCTTY|O_CLOEXEC);
/* First try to get an fd for the PTY peer via the new racefree ioctl(), directly. Otherwise go via
* joining the namespace, because it goes by path */
slave = pty_open_peer_racefree(master, O_RDWR|O_NOCTTY|O_CLOEXEC);
if (ERRNO_IS_NEG_NOT_SUPPORTED(slave))
slave = machine_open_terminal(m, pty_name, O_RDWR|O_NOCTTY|O_CLOEXEC);
if (slave < 0)
return slave;
@ -505,6 +506,8 @@ int bus_machine_method_open_shell(sd_bus_message *message, void *userdata, sd_bu
return r;
/* Name and mode */
const char *p = ASSERT_PTR(path_startswith(pty_name, "/dev/pts/"));
unit = strjoina("container-shell@", p, ".service");
r = sd_bus_message_append(tm, "ss", unit, "fail");
if (r < 0)

View File

@ -1858,11 +1858,11 @@ static void set_window_title(PTYForward *f) {
(void) pty_forward_set_title_prefix(f, dot);
}
static int chown_to_capsule(const char *path, const char *capsule) {
static int fchown_to_capsule(int fd, const char *capsule) {
_cleanup_free_ char *p = NULL;
int r;
assert(path);
assert(fd >= 0);
assert(capsule);
p = path_join("/run/capsules/", capsule);
@ -1877,7 +1877,7 @@ static int chown_to_capsule(const char *path, const char *capsule) {
if (uid_is_system(st.st_uid) || gid_is_system(st.st_gid)) /* paranoid safety check */
return -EPERM;
return chmod_and_chown(path, 0600, st.st_uid, st.st_gid);
return fchmod_and_chown(fd, 0600, st.st_uid, st.st_gid);
}
static int print_unit_invocation(const char *unit, sd_id128_t invocation_id) {
@ -1912,7 +1912,7 @@ static int start_transient_service(sd_bus *bus) {
_cleanup_(sd_bus_error_free) sd_bus_error error = SD_BUS_ERROR_NULL;
_cleanup_(bus_wait_for_jobs_freep) BusWaitForJobs *w = NULL;
_cleanup_free_ char *service = NULL, *pty_path = NULL;
_cleanup_close_ int master = -EBADF, slave = -EBADF;
_cleanup_close_ int pty_fd = -EBADF, peer_fd = -EBADF;
int r;
assert(bus);
@ -1922,29 +1922,22 @@ static int start_transient_service(sd_bus *bus) {
if (arg_stdio == ARG_STDIO_PTY) {
if (IN_SET(arg_transport, BUS_TRANSPORT_LOCAL, BUS_TRANSPORT_CAPSULE)) {
master = posix_openpt(O_RDWR|O_NOCTTY|O_CLOEXEC|O_NONBLOCK);
if (master < 0)
return log_error_errno(errno, "Failed to acquire pseudo tty: %m");
pty_fd = openpt_allocate(O_RDWR|O_NOCTTY|O_CLOEXEC|O_NONBLOCK, &pty_path);
if (pty_fd < 0)
return log_error_errno(pty_fd, "Failed to acquire pseudo tty: %m");
r = ptsname_malloc(master, &pty_path);
if (r < 0)
return log_error_errno(r, "Failed to determine tty name: %m");
peer_fd = pty_open_peer(pty_fd, O_RDWR|O_NOCTTY|O_CLOEXEC);
if (peer_fd < 0)
return log_error_errno(peer_fd, "Failed to open pty peer: %m");
if (arg_transport == BUS_TRANSPORT_CAPSULE) {
/* If we are in capsule mode, we must give the capsule UID/GID access to the PTY we just allocated first. */
r = chown_to_capsule(pty_path, arg_host);
r = fchown_to_capsule(peer_fd, arg_host);
if (r < 0)
return log_error_errno(r, "Failed to chown tty to capsule UID/GID: %m");
}
if (unlockpt(master) < 0)
return log_error_errno(errno, "Failed to unlock tty: %m");
slave = open_terminal(pty_path, O_RDWR|O_NOCTTY|O_CLOEXEC);
if (slave < 0)
return log_error_errno(slave, "Failed to open pty slave: %m");
} else if (arg_transport == BUS_TRANSPORT_MACHINE) {
_cleanup_(sd_bus_unrefp) sd_bus *system_bus = NULL;
_cleanup_(sd_bus_message_unrefp) sd_bus_message *pty_reply = NULL;
@ -1965,18 +1958,25 @@ static int start_transient_service(sd_bus *bus) {
if (r < 0)
return log_error_errno(r, "Failed to get machine PTY: %s", bus_error_message(&error, r));
r = sd_bus_message_read(pty_reply, "hs", &master, &s);
r = sd_bus_message_read(pty_reply, "hs", &pty_fd, &s);
if (r < 0)
return bus_log_parse_error(r);
master = fcntl(master, F_DUPFD_CLOEXEC, 3);
if (master < 0)
pty_fd = fcntl(pty_fd, F_DUPFD_CLOEXEC, 3);
if (pty_fd < 0)
return log_error_errno(errno, "Failed to duplicate master fd: %m");
pty_path = strdup(s);
if (!pty_path)
return log_oom();
peer_fd = pty_open_peer_racefree(pty_fd, O_RDWR|O_NOCTTY|O_CLOEXEC);
if (ERRNO_IS_NEG_NOT_SUPPORTED(peer_fd))
log_debug_errno(r, "TIOCGPTPEER ioctl not available, falling back to race-ful PTY peer opening: %m");
/* We do not open the peer_fd in this case, we let systemd on the remote side open it instead */
else if (peer_fd < 0)
return log_debug_errno(peer_fd, "Failed to open PTY peer: %m");
// FIXME: Introduce OpenMachinePTYEx() that accepts ownership/permission as param
// and additionally returns the pty fd, for #33216 and #32999
} else
@ -2005,10 +2005,10 @@ static int start_transient_service(sd_bus *bus) {
return r;
}
r = make_transient_service_unit(bus, &m, service, pty_path, slave);
r = make_transient_service_unit(bus, &m, service, pty_path, peer_fd);
if (r < 0)
return r;
slave = safe_close(slave);
peer_fd = safe_close(peer_fd);
r = bus_call_with_hint(bus, m, "service", &reply);
if (r < 0)
@ -2066,13 +2066,13 @@ static int start_transient_service(sd_bus *bus) {
if (!c.bus_path)
return log_oom();
if (master >= 0) {
if (pty_fd >= 0) {
(void) sd_event_set_signal_exit(c.event, true);
if (!arg_quiet)
log_info("Press ^] three times within 1s to disconnect TTY.");
r = pty_forward_new(c.event, master, PTY_FORWARD_IGNORE_INITIAL_VHANGUP, &c.forward);
r = pty_forward_new(c.event, pty_fd, PTY_FORWARD_IGNORE_INITIAL_VHANGUP, &c.forward);
if (r < 0)
return log_error_errno(r, "Failed to create PTY forwarder: %m");

View File

@ -216,14 +216,13 @@ TEST(terminal_fix_size) {
TEST(terminal_is_pty_fd) {
_cleanup_close_ int fd1 = -EBADF, fd2 = -EBADF;
_cleanup_free_ char *peer = NULL;
int r;
fd1 = openpt_allocate(O_RDWR, &peer);
fd1 = openpt_allocate(O_RDWR, /* ret_peer_path= */ NULL);
assert_se(fd1 >= 0);
assert_se(terminal_is_pty_fd(fd1) > 0);
fd2 = open_terminal(peer, O_RDWR|O_CLOEXEC|O_NOCTTY);
fd2 = pty_open_peer(fd1, O_RDWR|O_CLOEXEC|O_NOCTTY);
assert_se(fd2 >= 0);
assert_se(terminal_is_pty_fd(fd2) > 0);
@ -295,4 +294,24 @@ TEST(terminal_reset_defensive) {
log_notice_errno(r, "Failed to reset terminal: %m");
}
TEST(pty_open_peer) {
_cleanup_close_ int pty_fd = -EBADF, peer_fd = -EBADF;
_cleanup_free_ char *pty_path = NULL;
pty_fd = openpt_allocate(O_RDWR|O_NOCTTY|O_CLOEXEC|O_NONBLOCK, &pty_path);
assert(pty_fd >= 0);
assert(pty_path);
peer_fd = pty_open_peer(pty_fd, O_RDWR|O_NOCTTY|O_CLOEXEC);
assert(peer_fd >= 0);
static const char x[] = { 'x', '\n' };
assert(write(pty_fd, x, sizeof(x)) == 2);
char buf[3];
assert(read(peer_fd, &buf, sizeof(buf)) == sizeof(x));
assert(buf[0] == x[0]);
assert(buf[1] == x[1]);
}
DEFINE_TEST_MAIN(LOG_INFO);