seccomp: Support atomic "addfd + send reply"
Alban Crequy reported a race condition userspace faces when we want to add some fds and make the syscall return them[1] using seccomp notify. The problem is that currently two different ioctl() calls are needed by the process handling the syscalls (agent) for another userspace process (target): SECCOMP_IOCTL_NOTIF_ADDFD to allocate the fd and SECCOMP_IOCTL_NOTIF_SEND to return that value. Therefore, it is possible for the agent to do the first ioctl to add a file descriptor but the target is interrupted (EINTR) before the agent does the second ioctl() call. This patch adds a flag to the ADDFD ioctl() so it adds the fd and returns that value atomically to the target program, as suggested by Kees Cook[2]. This is done by simply allowing seccomp_do_user_notification() to add the fd and return it in this case. Therefore, in this case the target wakes up from the wait in seccomp_do_user_notification() either to interrupt the syscall or to add the fd and return it. This "allocate an fd and return" functionality is useful for syscalls that return a file descriptor only, like connect(2). Other syscalls that return a file descriptor but not as return value (or return more than one fd), like socketpair(), pipe(), recvmsg with SCM_RIGHTs, will not work with this flag. This effectively combines SECCOMP_IOCTL_NOTIF_ADDFD and SECCOMP_IOCTL_NOTIF_SEND into an atomic opteration. The notification's return value, nor error can be set by the user. Upon successful invocation of the SECCOMP_IOCTL_NOTIF_ADDFD ioctl with the SECCOMP_ADDFD_FLAG_SEND flag, the notifying process's errno will be 0, and the return value will be the file descriptor number that was installed. [1]: https://lore.kernel.org/lkml/CADZs7q4sw71iNHmV8EOOXhUKJMORPzF7thraxZYddTZsxta-KQ@mail.gmail.com/ [2]: https://lore.kernel.org/lkml/202012011322.26DCBC64F2@keescook/ Signed-off-by: Rodrigo Campos <rodrigo@kinvolk.io> Signed-off-by: Sargun Dhillon <sargun@sargun.me> Acked-by: Tycho Andersen <tycho@tycho.pizza> Acked-by: Christian Brauner <christian.brauner@ubuntu.com> Signed-off-by: Kees Cook <keescook@chromium.org> Link: https://lore.kernel.org/r/20210517193908.3113-4-sargun@sargun.me
This commit is contained in:
parent
ddc4739169
commit
0ae71c7720
@ -259,6 +259,18 @@ and ``ioctl(SECCOMP_IOCTL_NOTIF_SEND)`` a response, indicating what should be
|
|||||||
returned to userspace. The ``id`` member of ``struct seccomp_notif_resp`` should
|
returned to userspace. The ``id`` member of ``struct seccomp_notif_resp`` should
|
||||||
be the same ``id`` as in ``struct seccomp_notif``.
|
be the same ``id`` as in ``struct seccomp_notif``.
|
||||||
|
|
||||||
|
Userspace can also add file descriptors to the notifying process via
|
||||||
|
``ioctl(SECCOMP_IOCTL_NOTIF_ADDFD)``. The ``id`` member of
|
||||||
|
``struct seccomp_notif_addfd`` should be the same ``id`` as in
|
||||||
|
``struct seccomp_notif``. The ``newfd_flags`` flag may be used to set flags
|
||||||
|
like O_EXEC on the file descriptor in the notifying process. If the supervisor
|
||||||
|
wants to inject the file descriptor with a specific number, the
|
||||||
|
``SECCOMP_ADDFD_FLAG_SETFD`` flag can be used, and set the ``newfd`` member to
|
||||||
|
the specific number to use. If that file descriptor is already open in the
|
||||||
|
notifying process it will be replaced. The supervisor can also add an FD, and
|
||||||
|
respond atomically by using the ``SECCOMP_ADDFD_FLAG_SEND`` flag and the return
|
||||||
|
value will be the injected file descriptor number.
|
||||||
|
|
||||||
It is worth noting that ``struct seccomp_data`` contains the values of register
|
It is worth noting that ``struct seccomp_data`` contains the values of register
|
||||||
arguments to the syscall, but does not contain pointers to memory. The task's
|
arguments to the syscall, but does not contain pointers to memory. The task's
|
||||||
memory is accessible to suitably privileged traces via ``ptrace()`` or
|
memory is accessible to suitably privileged traces via ``ptrace()`` or
|
||||||
|
@ -115,6 +115,7 @@ struct seccomp_notif_resp {
|
|||||||
|
|
||||||
/* valid flags for seccomp_notif_addfd */
|
/* valid flags for seccomp_notif_addfd */
|
||||||
#define SECCOMP_ADDFD_FLAG_SETFD (1UL << 0) /* Specify remote fd */
|
#define SECCOMP_ADDFD_FLAG_SETFD (1UL << 0) /* Specify remote fd */
|
||||||
|
#define SECCOMP_ADDFD_FLAG_SEND (1UL << 1) /* Addfd and return it, atomically */
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* struct seccomp_notif_addfd
|
* struct seccomp_notif_addfd
|
||||||
|
@ -107,6 +107,7 @@ struct seccomp_knotif {
|
|||||||
* installing process should allocate the fd as normal.
|
* installing process should allocate the fd as normal.
|
||||||
* @flags: The flags for the new file descriptor. At the moment, only O_CLOEXEC
|
* @flags: The flags for the new file descriptor. At the moment, only O_CLOEXEC
|
||||||
* is allowed.
|
* is allowed.
|
||||||
|
* @ioctl_flags: The flags used for the seccomp_addfd ioctl.
|
||||||
* @ret: The return value of the installing process. It is set to the fd num
|
* @ret: The return value of the installing process. It is set to the fd num
|
||||||
* upon success (>= 0).
|
* upon success (>= 0).
|
||||||
* @completion: Indicates that the installing process has completed fd
|
* @completion: Indicates that the installing process has completed fd
|
||||||
@ -118,6 +119,7 @@ struct seccomp_kaddfd {
|
|||||||
struct file *file;
|
struct file *file;
|
||||||
int fd;
|
int fd;
|
||||||
unsigned int flags;
|
unsigned int flags;
|
||||||
|
__u32 ioctl_flags;
|
||||||
|
|
||||||
union {
|
union {
|
||||||
bool setfd;
|
bool setfd;
|
||||||
@ -1065,18 +1067,37 @@ static u64 seccomp_next_notify_id(struct seccomp_filter *filter)
|
|||||||
return filter->notif->next_id++;
|
return filter->notif->next_id++;
|
||||||
}
|
}
|
||||||
|
|
||||||
static void seccomp_handle_addfd(struct seccomp_kaddfd *addfd)
|
static void seccomp_handle_addfd(struct seccomp_kaddfd *addfd, struct seccomp_knotif *n)
|
||||||
{
|
{
|
||||||
|
int fd;
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Remove the notification, and reset the list pointers, indicating
|
* Remove the notification, and reset the list pointers, indicating
|
||||||
* that it has been handled.
|
* that it has been handled.
|
||||||
*/
|
*/
|
||||||
list_del_init(&addfd->list);
|
list_del_init(&addfd->list);
|
||||||
if (!addfd->setfd)
|
if (!addfd->setfd)
|
||||||
addfd->ret = receive_fd(addfd->file, addfd->flags);
|
fd = receive_fd(addfd->file, addfd->flags);
|
||||||
else
|
else
|
||||||
addfd->ret = receive_fd_replace(addfd->fd, addfd->file,
|
fd = receive_fd_replace(addfd->fd, addfd->file, addfd->flags);
|
||||||
addfd->flags);
|
addfd->ret = fd;
|
||||||
|
|
||||||
|
if (addfd->ioctl_flags & SECCOMP_ADDFD_FLAG_SEND) {
|
||||||
|
/* If we fail reset and return an error to the notifier */
|
||||||
|
if (fd < 0) {
|
||||||
|
n->state = SECCOMP_NOTIFY_SENT;
|
||||||
|
} else {
|
||||||
|
/* Return the FD we just added */
|
||||||
|
n->flags = 0;
|
||||||
|
n->error = 0;
|
||||||
|
n->val = fd;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Mark the notification as completed. From this point, addfd mem
|
||||||
|
* might be invalidated and we can't safely read it anymore.
|
||||||
|
*/
|
||||||
complete(&addfd->completion);
|
complete(&addfd->completion);
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -1120,7 +1141,7 @@ static int seccomp_do_user_notification(int this_syscall,
|
|||||||
struct seccomp_kaddfd, list);
|
struct seccomp_kaddfd, list);
|
||||||
/* Check if we were woken up by a addfd message */
|
/* Check if we were woken up by a addfd message */
|
||||||
if (addfd)
|
if (addfd)
|
||||||
seccomp_handle_addfd(addfd);
|
seccomp_handle_addfd(addfd, &n);
|
||||||
|
|
||||||
} while (n.state != SECCOMP_NOTIFY_REPLIED);
|
} while (n.state != SECCOMP_NOTIFY_REPLIED);
|
||||||
|
|
||||||
@ -1581,7 +1602,7 @@ static long seccomp_notify_addfd(struct seccomp_filter *filter,
|
|||||||
if (addfd.newfd_flags & ~O_CLOEXEC)
|
if (addfd.newfd_flags & ~O_CLOEXEC)
|
||||||
return -EINVAL;
|
return -EINVAL;
|
||||||
|
|
||||||
if (addfd.flags & ~SECCOMP_ADDFD_FLAG_SETFD)
|
if (addfd.flags & ~(SECCOMP_ADDFD_FLAG_SETFD | SECCOMP_ADDFD_FLAG_SEND))
|
||||||
return -EINVAL;
|
return -EINVAL;
|
||||||
|
|
||||||
if (addfd.newfd && !(addfd.flags & SECCOMP_ADDFD_FLAG_SETFD))
|
if (addfd.newfd && !(addfd.flags & SECCOMP_ADDFD_FLAG_SETFD))
|
||||||
@ -1591,6 +1612,7 @@ static long seccomp_notify_addfd(struct seccomp_filter *filter,
|
|||||||
if (!kaddfd.file)
|
if (!kaddfd.file)
|
||||||
return -EBADF;
|
return -EBADF;
|
||||||
|
|
||||||
|
kaddfd.ioctl_flags = addfd.flags;
|
||||||
kaddfd.flags = addfd.newfd_flags;
|
kaddfd.flags = addfd.newfd_flags;
|
||||||
kaddfd.setfd = addfd.flags & SECCOMP_ADDFD_FLAG_SETFD;
|
kaddfd.setfd = addfd.flags & SECCOMP_ADDFD_FLAG_SETFD;
|
||||||
kaddfd.fd = addfd.newfd;
|
kaddfd.fd = addfd.newfd;
|
||||||
@ -1616,6 +1638,23 @@ static long seccomp_notify_addfd(struct seccomp_filter *filter,
|
|||||||
goto out_unlock;
|
goto out_unlock;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
if (addfd.flags & SECCOMP_ADDFD_FLAG_SEND) {
|
||||||
|
/*
|
||||||
|
* Disallow queuing an atomic addfd + send reply while there are
|
||||||
|
* some addfd requests still to process.
|
||||||
|
*
|
||||||
|
* There is no clear reason to support it and allows us to keep
|
||||||
|
* the loop on the other side straight-forward.
|
||||||
|
*/
|
||||||
|
if (!list_empty(&knotif->addfd)) {
|
||||||
|
ret = -EBUSY;
|
||||||
|
goto out_unlock;
|
||||||
|
}
|
||||||
|
|
||||||
|
/* Allow exactly only one reply */
|
||||||
|
knotif->state = SECCOMP_NOTIFY_REPLIED;
|
||||||
|
}
|
||||||
|
|
||||||
list_add(&kaddfd.list, &knotif->addfd);
|
list_add(&kaddfd.list, &knotif->addfd);
|
||||||
complete(&knotif->ready);
|
complete(&knotif->ready);
|
||||||
mutex_unlock(&filter->notify_lock);
|
mutex_unlock(&filter->notify_lock);
|
||||||
|
Loading…
x
Reference in New Issue
Block a user