seccomp updates for v6.6-rc1
- Provide USER_NOTIFY flag for synchronous mode (Andrei Vagin, Peter Oskolkov). This touches the scheduler and perf but has been Acked by Peter Zijlstra. - Fix regression in syscall skipping and restart tracing on arm32. This touches arch/arm/ but has been Acked by Arnd Bergmann. -----BEGIN PGP SIGNATURE----- iQJKBAABCgA0FiEEpcP2jyKd1g9yPm4TiXL039xtwCYFAmTs418WHGtlZXNjb29r QGNocm9taXVtLm9yZwAKCRCJcvTf3G3AJohpD/4tEfRdnb/KDgwQ7uvqBonUJXcx wqw17LZCGTpBV3/Tp3+aEseD1NezOxiMJL88VyUHSy7nfDJShbL6QtyoenwEOeXJ HmBUfcIH3cqRutHEJ3drYBzBetpeeK2G+gTYVj+JoEfPWyPf+Egj+1JE2n1xLi92 WC1miBAyBZ59kN+D1hcDzJu24CkAwbcUYlEzGejN5lBOwxYV3/fjARBVRvefOO5m jljSCIVJOFgCiybKhJ7Zw1+lkFc3cIlcOgr4/ZegSc8PxFVebnuImTHHp/gvoo6F 7d1xe5Hk+PSfNvVq41MAeRB2vK2tY5efwjXRarThUaydPTO43KiQm0dzP0EYWK9a LcOg8zAXZnpvuWU5O2SqUKADcxe2TjS1WuQ/Q4ixxgKz2kJKDwrNU8Frf327eLSR acfZgMMiUfEXyXDV9B3LzNAtwdvwyxYrzEzxgKywhThIhZmQDat0rI2IaTV5QIc5 pkxiFEe0TPwpzyUVO9dSzE+ughTmNQOKk5uAM9e2NwRwVdhEmlZAxo0kStJ1NoaA yDjYIKfaNBElchL4v2931KJFJseI+uRaWdW10JEV+1M69+gEAEs6wbmAxtcYS776 xWsYp3slXzlmeVyvQp/ah8p0y55r+qTbcnhkvIdiwLYei4Bh3KOoJUlVmW0V5dKq b+7qspIvBA0kKRAqPw== =DI8R -----END PGP SIGNATURE----- Merge tag 'seccomp-v6.6-rc1' of git://git.kernel.org/pub/scm/linux/kernel/git/kees/linux Pull seccomp updates from Kees Cook: - Provide USER_NOTIFY flag for synchronous mode (Andrei Vagin, Peter Oskolkov). This touches the scheduler and perf but has been Acked by Peter Zijlstra. - Fix regression in syscall skipping and restart tracing on arm32. This touches arch/arm/ but has been Acked by Arnd Bergmann. * tag 'seccomp-v6.6-rc1' of git://git.kernel.org/pub/scm/linux/kernel/git/kees/linux: seccomp: Add missing kerndoc notations ARM: ptrace: Restore syscall skipping for tracers ARM: ptrace: Restore syscall restart tracing selftests/seccomp: Handle arm32 corner cases better perf/benchmark: add a new benchmark for seccom_unotify selftest/seccomp: add a new test for the sync mode of seccomp_user_notify seccomp: add the synchronous mode for seccomp_unotify sched: add a few helpers to wake up tasks on the current cpu sched: add WF_CURRENT_CPU and externise ttwu seccomp: don't use semaphore and wait_queue together
This commit is contained in:
commit
b03a434214
@ -25,6 +25,9 @@ static inline int syscall_get_nr(struct task_struct *task,
|
||||
if (IS_ENABLED(CONFIG_AEABI) && !IS_ENABLED(CONFIG_OABI_COMPAT))
|
||||
return task_thread_info(task)->abi_syscall;
|
||||
|
||||
if (task_thread_info(task)->abi_syscall == -1)
|
||||
return -1;
|
||||
|
||||
return task_thread_info(task)->abi_syscall & __NR_SYSCALL_MASK;
|
||||
}
|
||||
|
||||
|
@ -90,6 +90,7 @@ slow_work_pending:
|
||||
cmp r0, #0
|
||||
beq no_work_pending
|
||||
movlt scno, #(__NR_restart_syscall - __NR_SYSCALL_BASE)
|
||||
str scno, [tsk, #TI_ABI_SYSCALL] @ make sure tracers see update
|
||||
ldmia sp, {r0 - r6} @ have to reload r0 - r6
|
||||
b local_restart @ ... and off we go
|
||||
ENDPROC(ret_fast_syscall)
|
||||
|
@ -783,8 +783,9 @@ long arch_ptrace(struct task_struct *child, long request,
|
||||
break;
|
||||
|
||||
case PTRACE_SET_SYSCALL:
|
||||
task_thread_info(child)->abi_syscall = data &
|
||||
__NR_SYSCALL_MASK;
|
||||
if (data != -1)
|
||||
data &= __NR_SYSCALL_MASK;
|
||||
task_thread_info(child)->abi_syscall = data;
|
||||
ret = 0;
|
||||
break;
|
||||
|
||||
|
@ -116,6 +116,7 @@ extern bool try_wait_for_completion(struct completion *x);
|
||||
extern bool completion_done(struct completion *x);
|
||||
|
||||
extern void complete(struct completion *);
|
||||
extern void complete_on_current_cpu(struct completion *x);
|
||||
extern void complete_all(struct completion *);
|
||||
|
||||
#endif
|
||||
|
@ -146,7 +146,7 @@ static inline bool swq_has_sleeper(struct swait_queue_head *wq)
|
||||
|
||||
extern void swake_up_one(struct swait_queue_head *q);
|
||||
extern void swake_up_all(struct swait_queue_head *q);
|
||||
extern void swake_up_locked(struct swait_queue_head *q);
|
||||
extern void swake_up_locked(struct swait_queue_head *q, int wake_flags);
|
||||
|
||||
extern void prepare_to_swait_exclusive(struct swait_queue_head *q, struct swait_queue *wait, int state);
|
||||
extern long prepare_to_swait_event(struct swait_queue_head *q, struct swait_queue *wait, int state);
|
||||
|
@ -210,6 +210,7 @@ __remove_wait_queue(struct wait_queue_head *wq_head, struct wait_queue_entry *wq
|
||||
}
|
||||
|
||||
int __wake_up(struct wait_queue_head *wq_head, unsigned int mode, int nr, void *key);
|
||||
void __wake_up_on_current_cpu(struct wait_queue_head *wq_head, unsigned int mode, void *key);
|
||||
void __wake_up_locked_key(struct wait_queue_head *wq_head, unsigned int mode, void *key);
|
||||
void __wake_up_locked_key_bookmark(struct wait_queue_head *wq_head,
|
||||
unsigned int mode, void *key, wait_queue_entry_t *bookmark);
|
||||
@ -237,6 +238,8 @@ void __wake_up_pollfree(struct wait_queue_head *wq_head);
|
||||
#define key_to_poll(m) ((__force __poll_t)(uintptr_t)(void *)(m))
|
||||
#define wake_up_poll(x, m) \
|
||||
__wake_up(x, TASK_NORMAL, 1, poll_to_key(m))
|
||||
#define wake_up_poll_on_current_cpu(x, m) \
|
||||
__wake_up_on_current_cpu(x, TASK_NORMAL, poll_to_key(m))
|
||||
#define wake_up_locked_poll(x, m) \
|
||||
__wake_up_locked_key((x), TASK_NORMAL, poll_to_key(m))
|
||||
#define wake_up_interruptible_poll(x, m) \
|
||||
|
@ -115,6 +115,8 @@ struct seccomp_notif_resp {
|
||||
__u32 flags;
|
||||
};
|
||||
|
||||
#define SECCOMP_USER_NOTIF_FD_SYNC_WAKE_UP (1UL << 0)
|
||||
|
||||
/* valid flags for seccomp_notif_addfd */
|
||||
#define SECCOMP_ADDFD_FLAG_SETFD (1UL << 0) /* Specify remote fd */
|
||||
#define SECCOMP_ADDFD_FLAG_SEND (1UL << 1) /* Addfd and return it, atomically */
|
||||
@ -150,4 +152,6 @@ struct seccomp_notif_addfd {
|
||||
#define SECCOMP_IOCTL_NOTIF_ADDFD SECCOMP_IOW(3, \
|
||||
struct seccomp_notif_addfd)
|
||||
|
||||
#define SECCOMP_IOCTL_NOTIF_SET_FLAGS SECCOMP_IOW(4, __u64)
|
||||
|
||||
#endif /* _UAPI_LINUX_SECCOMP_H */
|
||||
|
@ -13,6 +13,23 @@
|
||||
* Waiting for completion is a typically sync point, but not an exclusion point.
|
||||
*/
|
||||
|
||||
static void complete_with_flags(struct completion *x, int wake_flags)
|
||||
{
|
||||
unsigned long flags;
|
||||
|
||||
raw_spin_lock_irqsave(&x->wait.lock, flags);
|
||||
|
||||
if (x->done != UINT_MAX)
|
||||
x->done++;
|
||||
swake_up_locked(&x->wait, wake_flags);
|
||||
raw_spin_unlock_irqrestore(&x->wait.lock, flags);
|
||||
}
|
||||
|
||||
void complete_on_current_cpu(struct completion *x)
|
||||
{
|
||||
return complete_with_flags(x, WF_CURRENT_CPU);
|
||||
}
|
||||
|
||||
/**
|
||||
* complete: - signals a single thread waiting on this completion
|
||||
* @x: holds the state of this particular completion
|
||||
@ -27,14 +44,7 @@
|
||||
*/
|
||||
void complete(struct completion *x)
|
||||
{
|
||||
unsigned long flags;
|
||||
|
||||
raw_spin_lock_irqsave(&x->wait.lock, flags);
|
||||
|
||||
if (x->done != UINT_MAX)
|
||||
x->done++;
|
||||
swake_up_locked(&x->wait);
|
||||
raw_spin_unlock_irqrestore(&x->wait.lock, flags);
|
||||
complete_with_flags(x, 0);
|
||||
}
|
||||
EXPORT_SYMBOL(complete);
|
||||
|
||||
|
@ -4193,8 +4193,7 @@ bool ttwu_state_match(struct task_struct *p, unsigned int state, int *success)
|
||||
* Return: %true if @p->state changes (an actual wakeup was done),
|
||||
* %false otherwise.
|
||||
*/
|
||||
static int
|
||||
try_to_wake_up(struct task_struct *p, unsigned int state, int wake_flags)
|
||||
int try_to_wake_up(struct task_struct *p, unsigned int state, int wake_flags)
|
||||
{
|
||||
unsigned long flags;
|
||||
int cpu, success = 0;
|
||||
@ -7030,7 +7029,7 @@ asmlinkage __visible void __sched preempt_schedule_irq(void)
|
||||
int default_wake_function(wait_queue_entry_t *curr, unsigned mode, int wake_flags,
|
||||
void *key)
|
||||
{
|
||||
WARN_ON_ONCE(IS_ENABLED(CONFIG_SCHED_DEBUG) && wake_flags & ~WF_SYNC);
|
||||
WARN_ON_ONCE(IS_ENABLED(CONFIG_SCHED_DEBUG) && wake_flags & ~(WF_SYNC|WF_CURRENT_CPU));
|
||||
return try_to_wake_up(curr->private, mode, wake_flags);
|
||||
}
|
||||
EXPORT_SYMBOL(default_wake_function);
|
||||
|
@ -7741,6 +7741,10 @@ select_task_rq_fair(struct task_struct *p, int prev_cpu, int wake_flags)
|
||||
if (wake_flags & WF_TTWU) {
|
||||
record_wakee(p);
|
||||
|
||||
if ((wake_flags & WF_CURRENT_CPU) &&
|
||||
cpumask_test_cpu(cpu, p->cpus_ptr))
|
||||
return cpu;
|
||||
|
||||
if (sched_energy_enabled()) {
|
||||
new_cpu = find_energy_efficient_cpu(p, prev_cpu);
|
||||
if (new_cpu >= 0)
|
||||
|
@ -2131,12 +2131,13 @@ static inline int task_on_rq_migrating(struct task_struct *p)
|
||||
}
|
||||
|
||||
/* Wake flags. The first three directly map to some SD flag value */
|
||||
#define WF_EXEC 0x02 /* Wakeup after exec; maps to SD_BALANCE_EXEC */
|
||||
#define WF_FORK 0x04 /* Wakeup after fork; maps to SD_BALANCE_FORK */
|
||||
#define WF_TTWU 0x08 /* Wakeup; maps to SD_BALANCE_WAKE */
|
||||
#define WF_EXEC 0x02 /* Wakeup after exec; maps to SD_BALANCE_EXEC */
|
||||
#define WF_FORK 0x04 /* Wakeup after fork; maps to SD_BALANCE_FORK */
|
||||
#define WF_TTWU 0x08 /* Wakeup; maps to SD_BALANCE_WAKE */
|
||||
|
||||
#define WF_SYNC 0x10 /* Waker goes to sleep after wakeup */
|
||||
#define WF_MIGRATED 0x20 /* Internal use, task got migrated */
|
||||
#define WF_SYNC 0x10 /* Waker goes to sleep after wakeup */
|
||||
#define WF_MIGRATED 0x20 /* Internal use, task got migrated */
|
||||
#define WF_CURRENT_CPU 0x40 /* Prefer to move the wakee to the current CPU. */
|
||||
|
||||
#ifdef CONFIG_SMP
|
||||
static_assert(WF_EXEC == SD_BALANCE_EXEC);
|
||||
@ -3229,6 +3230,8 @@ static inline bool is_per_cpu_kthread(struct task_struct *p)
|
||||
extern void swake_up_all_locked(struct swait_queue_head *q);
|
||||
extern void __prepare_to_swait(struct swait_queue_head *q, struct swait_queue *wait);
|
||||
|
||||
extern int try_to_wake_up(struct task_struct *tsk, unsigned int state, int wake_flags);
|
||||
|
||||
#ifdef CONFIG_PREEMPT_DYNAMIC
|
||||
extern int preempt_dynamic_mode;
|
||||
extern int sched_dynamic_mode(const char *str);
|
||||
|
@ -18,7 +18,7 @@ EXPORT_SYMBOL(__init_swait_queue_head);
|
||||
* If for some reason it would return 0, that means the previously waiting
|
||||
* task is already running, so it will observe condition true (or has already).
|
||||
*/
|
||||
void swake_up_locked(struct swait_queue_head *q)
|
||||
void swake_up_locked(struct swait_queue_head *q, int wake_flags)
|
||||
{
|
||||
struct swait_queue *curr;
|
||||
|
||||
@ -26,7 +26,7 @@ void swake_up_locked(struct swait_queue_head *q)
|
||||
return;
|
||||
|
||||
curr = list_first_entry(&q->task_list, typeof(*curr), task_list);
|
||||
wake_up_process(curr->task);
|
||||
try_to_wake_up(curr->task, TASK_NORMAL, wake_flags);
|
||||
list_del_init(&curr->task_list);
|
||||
}
|
||||
EXPORT_SYMBOL(swake_up_locked);
|
||||
@ -41,7 +41,7 @@ EXPORT_SYMBOL(swake_up_locked);
|
||||
void swake_up_all_locked(struct swait_queue_head *q)
|
||||
{
|
||||
while (!list_empty(&q->task_list))
|
||||
swake_up_locked(q);
|
||||
swake_up_locked(q, 0);
|
||||
}
|
||||
|
||||
void swake_up_one(struct swait_queue_head *q)
|
||||
@ -49,7 +49,7 @@ void swake_up_one(struct swait_queue_head *q)
|
||||
unsigned long flags;
|
||||
|
||||
raw_spin_lock_irqsave(&q->lock, flags);
|
||||
swake_up_locked(q);
|
||||
swake_up_locked(q, 0);
|
||||
raw_spin_unlock_irqrestore(&q->lock, flags);
|
||||
}
|
||||
EXPORT_SYMBOL(swake_up_one);
|
||||
|
@ -161,6 +161,11 @@ int __wake_up(struct wait_queue_head *wq_head, unsigned int mode,
|
||||
}
|
||||
EXPORT_SYMBOL(__wake_up);
|
||||
|
||||
void __wake_up_on_current_cpu(struct wait_queue_head *wq_head, unsigned int mode, void *key)
|
||||
{
|
||||
__wake_up_common_lock(wq_head, mode, 1, WF_CURRENT_CPU, key);
|
||||
}
|
||||
|
||||
/*
|
||||
* Same as __wake_up but called with the spinlock in wait_queue_head_t held.
|
||||
*/
|
||||
|
@ -110,11 +110,13 @@ struct seccomp_knotif {
|
||||
* @flags: The flags for the new file descriptor. At the moment, only O_CLOEXEC
|
||||
* is allowed.
|
||||
* @ioctl_flags: The flags used for the seccomp_addfd ioctl.
|
||||
* @setfd: whether or not SECCOMP_ADDFD_FLAG_SETFD was set during notify_addfd
|
||||
* @ret: The return value of the installing process. It is set to the fd num
|
||||
* upon success (>= 0).
|
||||
* @completion: Indicates that the installing process has completed fd
|
||||
* installation, or gone away (either due to successful
|
||||
* reply, or signal)
|
||||
* @list: list_head for chaining seccomp_kaddfd together.
|
||||
*
|
||||
*/
|
||||
struct seccomp_kaddfd {
|
||||
@ -138,14 +140,17 @@ struct seccomp_kaddfd {
|
||||
* structure is fairly large, we store the notification-specific stuff in a
|
||||
* separate structure.
|
||||
*
|
||||
* @request: A semaphore that users of this notification can wait on for
|
||||
* changes. Actual reads and writes are still controlled with
|
||||
* filter->notify_lock.
|
||||
* @requests: A semaphore that users of this notification can wait on for
|
||||
* changes. Actual reads and writes are still controlled with
|
||||
* filter->notify_lock.
|
||||
* @flags: A set of SECCOMP_USER_NOTIF_FD_* flags.
|
||||
* @next_id: The id of the next request.
|
||||
* @notifications: A list of struct seccomp_knotif elements.
|
||||
*/
|
||||
|
||||
struct notification {
|
||||
struct semaphore request;
|
||||
atomic_t requests;
|
||||
u32 flags;
|
||||
u64 next_id;
|
||||
struct list_head notifications;
|
||||
};
|
||||
@ -555,6 +560,8 @@ static void __seccomp_filter_release(struct seccomp_filter *orig)
|
||||
* drop its reference count, and notify
|
||||
* about unused filters
|
||||
*
|
||||
* @tsk: task the filter should be released from.
|
||||
*
|
||||
* This function should only be called when the task is exiting as
|
||||
* it detaches it from its filter tree. As such, READ_ONCE() and
|
||||
* barriers are not needed here, as would normally be needed.
|
||||
@ -574,6 +581,8 @@ void seccomp_filter_release(struct task_struct *tsk)
|
||||
/**
|
||||
* seccomp_sync_threads: sets all threads to use current's filter
|
||||
*
|
||||
* @flags: SECCOMP_FILTER_FLAG_* flags to set during sync.
|
||||
*
|
||||
* Expects sighand and cred_guard_mutex locks to be held, and for
|
||||
* seccomp_can_sync_threads() to have returned success already
|
||||
* without dropping the locks.
|
||||
@ -1116,8 +1125,11 @@ static int seccomp_do_user_notification(int this_syscall,
|
||||
list_add_tail(&n.list, &match->notif->notifications);
|
||||
INIT_LIST_HEAD(&n.addfd);
|
||||
|
||||
up(&match->notif->request);
|
||||
wake_up_poll(&match->wqh, EPOLLIN | EPOLLRDNORM);
|
||||
atomic_inc(&match->notif->requests);
|
||||
if (match->notif->flags & SECCOMP_USER_NOTIF_FD_SYNC_WAKE_UP)
|
||||
wake_up_poll_on_current_cpu(&match->wqh, EPOLLIN | EPOLLRDNORM);
|
||||
else
|
||||
wake_up_poll(&match->wqh, EPOLLIN | EPOLLRDNORM);
|
||||
|
||||
/*
|
||||
* This is where we wait for a reply from userspace.
|
||||
@ -1450,6 +1462,37 @@ find_notification(struct seccomp_filter *filter, u64 id)
|
||||
return NULL;
|
||||
}
|
||||
|
||||
static int recv_wake_function(wait_queue_entry_t *wait, unsigned int mode, int sync,
|
||||
void *key)
|
||||
{
|
||||
/* Avoid a wakeup if event not interesting for us. */
|
||||
if (key && !(key_to_poll(key) & (EPOLLIN | EPOLLERR)))
|
||||
return 0;
|
||||
return autoremove_wake_function(wait, mode, sync, key);
|
||||
}
|
||||
|
||||
static int recv_wait_event(struct seccomp_filter *filter)
|
||||
{
|
||||
DEFINE_WAIT_FUNC(wait, recv_wake_function);
|
||||
int ret;
|
||||
|
||||
if (atomic_dec_if_positive(&filter->notif->requests) >= 0)
|
||||
return 0;
|
||||
|
||||
for (;;) {
|
||||
ret = prepare_to_wait_event(&filter->wqh, &wait, TASK_INTERRUPTIBLE);
|
||||
|
||||
if (atomic_dec_if_positive(&filter->notif->requests) >= 0)
|
||||
break;
|
||||
|
||||
if (ret)
|
||||
return ret;
|
||||
|
||||
schedule();
|
||||
}
|
||||
finish_wait(&filter->wqh, &wait);
|
||||
return 0;
|
||||
}
|
||||
|
||||
static long seccomp_notify_recv(struct seccomp_filter *filter,
|
||||
void __user *buf)
|
||||
@ -1467,7 +1510,7 @@ static long seccomp_notify_recv(struct seccomp_filter *filter,
|
||||
|
||||
memset(&unotif, 0, sizeof(unotif));
|
||||
|
||||
ret = down_interruptible(&filter->notif->request);
|
||||
ret = recv_wait_event(filter);
|
||||
if (ret < 0)
|
||||
return ret;
|
||||
|
||||
@ -1515,7 +1558,8 @@ out:
|
||||
if (should_sleep_killable(filter, knotif))
|
||||
complete(&knotif->ready);
|
||||
knotif->state = SECCOMP_NOTIFY_INIT;
|
||||
up(&filter->notif->request);
|
||||
atomic_inc(&filter->notif->requests);
|
||||
wake_up_poll(&filter->wqh, EPOLLIN | EPOLLRDNORM);
|
||||
}
|
||||
mutex_unlock(&filter->notify_lock);
|
||||
}
|
||||
@ -1561,7 +1605,10 @@ static long seccomp_notify_send(struct seccomp_filter *filter,
|
||||
knotif->error = resp.error;
|
||||
knotif->val = resp.val;
|
||||
knotif->flags = resp.flags;
|
||||
complete(&knotif->ready);
|
||||
if (filter->notif->flags & SECCOMP_USER_NOTIF_FD_SYNC_WAKE_UP)
|
||||
complete_on_current_cpu(&knotif->ready);
|
||||
else
|
||||
complete(&knotif->ready);
|
||||
out:
|
||||
mutex_unlock(&filter->notify_lock);
|
||||
return ret;
|
||||
@ -1591,6 +1638,22 @@ static long seccomp_notify_id_valid(struct seccomp_filter *filter,
|
||||
return ret;
|
||||
}
|
||||
|
||||
static long seccomp_notify_set_flags(struct seccomp_filter *filter,
|
||||
unsigned long flags)
|
||||
{
|
||||
long ret;
|
||||
|
||||
if (flags & ~SECCOMP_USER_NOTIF_FD_SYNC_WAKE_UP)
|
||||
return -EINVAL;
|
||||
|
||||
ret = mutex_lock_interruptible(&filter->notify_lock);
|
||||
if (ret < 0)
|
||||
return ret;
|
||||
filter->notif->flags = flags;
|
||||
mutex_unlock(&filter->notify_lock);
|
||||
return 0;
|
||||
}
|
||||
|
||||
static long seccomp_notify_addfd(struct seccomp_filter *filter,
|
||||
struct seccomp_notif_addfd __user *uaddfd,
|
||||
unsigned int size)
|
||||
@ -1720,6 +1783,8 @@ static long seccomp_notify_ioctl(struct file *file, unsigned int cmd,
|
||||
case SECCOMP_IOCTL_NOTIF_ID_VALID_WRONG_DIR:
|
||||
case SECCOMP_IOCTL_NOTIF_ID_VALID:
|
||||
return seccomp_notify_id_valid(filter, buf);
|
||||
case SECCOMP_IOCTL_NOTIF_SET_FLAGS:
|
||||
return seccomp_notify_set_flags(filter, arg);
|
||||
}
|
||||
|
||||
/* Extensible Argument ioctls */
|
||||
@ -1777,7 +1842,6 @@ static struct file *init_listener(struct seccomp_filter *filter)
|
||||
if (!filter->notif)
|
||||
goto out;
|
||||
|
||||
sema_init(&filter->notif->request, 0);
|
||||
filter->notif->next_id = get_random_u64();
|
||||
INIT_LIST_HEAD(&filter->notif->notifications);
|
||||
|
||||
|
@ -26,3 +26,6 @@
|
||||
#ifndef __NR_setns
|
||||
#define __NR_setns 346
|
||||
#endif
|
||||
#ifdef __NR_seccomp
|
||||
#define __NR_seccomp 354
|
||||
#endif
|
||||
|
@ -26,3 +26,6 @@
|
||||
#ifndef __NR_getcpu
|
||||
#define __NR_getcpu 309
|
||||
#endif
|
||||
#ifndef __NR_seccomp
|
||||
#define __NR_seccomp 317
|
||||
#endif
|
||||
|
@ -1,5 +1,6 @@
|
||||
perf-y += sched-messaging.o
|
||||
perf-y += sched-pipe.o
|
||||
perf-y += sched-seccomp-notify.o
|
||||
perf-y += syscall.o
|
||||
perf-y += mem-functions.o
|
||||
perf-y += futex-hash.o
|
||||
|
@ -21,6 +21,7 @@ extern struct timeval bench__start, bench__end, bench__runtime;
|
||||
int bench_numa(int argc, const char **argv);
|
||||
int bench_sched_messaging(int argc, const char **argv);
|
||||
int bench_sched_pipe(int argc, const char **argv);
|
||||
int bench_sched_seccomp_notify(int argc, const char **argv);
|
||||
int bench_syscall_basic(int argc, const char **argv);
|
||||
int bench_syscall_getpgid(int argc, const char **argv);
|
||||
int bench_syscall_fork(int argc, const char **argv);
|
||||
|
178
tools/perf/bench/sched-seccomp-notify.c
Normal file
178
tools/perf/bench/sched-seccomp-notify.c
Normal file
@ -0,0 +1,178 @@
|
||||
// SPDX-License-Identifier: GPL-2.0
|
||||
#include <subcmd/parse-options.h>
|
||||
#include "bench.h"
|
||||
|
||||
#include <uapi/linux/filter.h>
|
||||
#include <sys/types.h>
|
||||
#include <sys/time.h>
|
||||
#include <linux/unistd.h>
|
||||
#include <sys/syscall.h>
|
||||
#include <sys/ioctl.h>
|
||||
#include <linux/time64.h>
|
||||
#include <linux/seccomp.h>
|
||||
#include <sys/prctl.h>
|
||||
|
||||
#include <unistd.h>
|
||||
#include <limits.h>
|
||||
#include <stddef.h>
|
||||
#include <stdint.h>
|
||||
#include <stdio.h>
|
||||
#include <stdlib.h>
|
||||
#include <signal.h>
|
||||
#include <sys/wait.h>
|
||||
#include <string.h>
|
||||
#include <errno.h>
|
||||
#include <err.h>
|
||||
#include <inttypes.h>
|
||||
|
||||
#define LOOPS_DEFAULT 1000000UL
|
||||
static uint64_t loops = LOOPS_DEFAULT;
|
||||
static bool sync_mode;
|
||||
|
||||
static const struct option options[] = {
|
||||
OPT_U64('l', "loop", &loops, "Specify number of loops"),
|
||||
OPT_BOOLEAN('s', "sync-mode", &sync_mode,
|
||||
"Enable the synchronious mode for seccomp notifications"),
|
||||
OPT_END()
|
||||
};
|
||||
|
||||
static const char * const bench_seccomp_usage[] = {
|
||||
"perf bench sched secccomp-notify <options>",
|
||||
NULL
|
||||
};
|
||||
|
||||
static int seccomp(unsigned int op, unsigned int flags, void *args)
|
||||
{
|
||||
return syscall(__NR_seccomp, op, flags, args);
|
||||
}
|
||||
|
||||
static int user_notif_syscall(int nr, unsigned int flags)
|
||||
{
|
||||
struct sock_filter filter[] = {
|
||||
BPF_STMT(BPF_LD|BPF_W|BPF_ABS,
|
||||
offsetof(struct seccomp_data, nr)),
|
||||
BPF_JUMP(BPF_JMP|BPF_JEQ|BPF_K, nr, 0, 1),
|
||||
BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_USER_NOTIF),
|
||||
BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW),
|
||||
};
|
||||
|
||||
struct sock_fprog prog = {
|
||||
.len = (unsigned short)ARRAY_SIZE(filter),
|
||||
.filter = filter,
|
||||
};
|
||||
|
||||
return seccomp(SECCOMP_SET_MODE_FILTER, flags, &prog);
|
||||
}
|
||||
|
||||
#define USER_NOTIF_MAGIC INT_MAX
|
||||
static void user_notification_sync_loop(int listener)
|
||||
{
|
||||
struct seccomp_notif_resp resp;
|
||||
struct seccomp_notif req;
|
||||
uint64_t nr;
|
||||
|
||||
for (nr = 0; nr < loops; nr++) {
|
||||
memset(&req, 0, sizeof(req));
|
||||
if (ioctl(listener, SECCOMP_IOCTL_NOTIF_RECV, &req))
|
||||
err(EXIT_FAILURE, "SECCOMP_IOCTL_NOTIF_RECV failed");
|
||||
|
||||
if (req.data.nr != __NR_gettid)
|
||||
errx(EXIT_FAILURE, "unexpected syscall: %d", req.data.nr);
|
||||
|
||||
resp.id = req.id;
|
||||
resp.error = 0;
|
||||
resp.val = USER_NOTIF_MAGIC;
|
||||
resp.flags = 0;
|
||||
if (ioctl(listener, SECCOMP_IOCTL_NOTIF_SEND, &resp))
|
||||
err(EXIT_FAILURE, "SECCOMP_IOCTL_NOTIF_SEND failed");
|
||||
}
|
||||
}
|
||||
|
||||
#ifndef SECCOMP_USER_NOTIF_FD_SYNC_WAKE_UP
|
||||
#define SECCOMP_USER_NOTIF_FD_SYNC_WAKE_UP (1UL << 0)
|
||||
#define SECCOMP_IOCTL_NOTIF_SET_FLAGS SECCOMP_IOW(4, __u64)
|
||||
#endif
|
||||
int bench_sched_seccomp_notify(int argc, const char **argv)
|
||||
{
|
||||
struct timeval start, stop, diff;
|
||||
unsigned long long result_usec = 0;
|
||||
int status, listener;
|
||||
pid_t pid;
|
||||
long ret;
|
||||
|
||||
argc = parse_options(argc, argv, options, bench_seccomp_usage, 0);
|
||||
|
||||
gettimeofday(&start, NULL);
|
||||
|
||||
prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0);
|
||||
listener = user_notif_syscall(__NR_gettid,
|
||||
SECCOMP_FILTER_FLAG_NEW_LISTENER);
|
||||
if (listener < 0)
|
||||
err(EXIT_FAILURE, "can't create a notification descriptor");
|
||||
|
||||
pid = fork();
|
||||
if (pid < 0)
|
||||
err(EXIT_FAILURE, "fork");
|
||||
if (pid == 0) {
|
||||
if (prctl(PR_SET_PDEATHSIG, SIGKILL, 0, 0, 0))
|
||||
err(EXIT_FAILURE, "can't set the parent death signal");
|
||||
while (1) {
|
||||
ret = syscall(__NR_gettid);
|
||||
if (ret == USER_NOTIF_MAGIC)
|
||||
continue;
|
||||
break;
|
||||
}
|
||||
_exit(1);
|
||||
}
|
||||
|
||||
if (sync_mode) {
|
||||
if (ioctl(listener, SECCOMP_IOCTL_NOTIF_SET_FLAGS,
|
||||
SECCOMP_USER_NOTIF_FD_SYNC_WAKE_UP, 0))
|
||||
err(EXIT_FAILURE,
|
||||
"can't set SECCOMP_USER_NOTIF_FD_SYNC_WAKE_UP");
|
||||
}
|
||||
user_notification_sync_loop(listener);
|
||||
|
||||
kill(pid, SIGKILL);
|
||||
if (waitpid(pid, &status, 0) != pid)
|
||||
err(EXIT_FAILURE, "waitpid(%d) failed", pid);
|
||||
if (!WIFSIGNALED(status) || WTERMSIG(status) != SIGKILL)
|
||||
errx(EXIT_FAILURE, "unexpected exit code: %d", status);
|
||||
|
||||
gettimeofday(&stop, NULL);
|
||||
timersub(&stop, &start, &diff);
|
||||
|
||||
switch (bench_format) {
|
||||
case BENCH_FORMAT_DEFAULT:
|
||||
printf("# Executed %" PRIu64 " system calls\n\n",
|
||||
loops);
|
||||
|
||||
result_usec = diff.tv_sec * USEC_PER_SEC;
|
||||
result_usec += diff.tv_usec;
|
||||
|
||||
printf(" %14s: %lu.%03lu [sec]\n\n", "Total time",
|
||||
(unsigned long) diff.tv_sec,
|
||||
(unsigned long) (diff.tv_usec / USEC_PER_MSEC));
|
||||
|
||||
printf(" %14lf usecs/op\n",
|
||||
(double)result_usec / (double)loops);
|
||||
printf(" %14d ops/sec\n",
|
||||
(int)((double)loops /
|
||||
((double)result_usec / (double)USEC_PER_SEC)));
|
||||
break;
|
||||
|
||||
case BENCH_FORMAT_SIMPLE:
|
||||
printf("%lu.%03lu\n",
|
||||
(unsigned long) diff.tv_sec,
|
||||
(unsigned long) (diff.tv_usec / USEC_PER_MSEC));
|
||||
break;
|
||||
|
||||
default:
|
||||
/* reaching here is something disaster */
|
||||
fprintf(stderr, "Unknown format:%d\n", bench_format);
|
||||
exit(1);
|
||||
break;
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
@ -47,6 +47,7 @@ static struct bench numa_benchmarks[] = {
|
||||
static struct bench sched_benchmarks[] = {
|
||||
{ "messaging", "Benchmark for scheduling and IPC", bench_sched_messaging },
|
||||
{ "pipe", "Benchmark for pipe() between two processes", bench_sched_pipe },
|
||||
{ "seccomp-notify", "Benchmark for seccomp user notify", bench_sched_seccomp_notify},
|
||||
{ "all", "Run all scheduler benchmarks", NULL },
|
||||
{ NULL, NULL, NULL }
|
||||
};
|
||||
|
@ -2184,6 +2184,9 @@ FIXTURE_TEARDOWN(TRACE_syscall)
|
||||
|
||||
TEST(negative_ENOSYS)
|
||||
{
|
||||
#if defined(__arm__)
|
||||
SKIP(return, "arm32 does not support calling syscall -1");
|
||||
#endif
|
||||
/*
|
||||
* There should be no difference between an "internal" skip
|
||||
* and userspace asking for syscall "-1".
|
||||
@ -3072,7 +3075,8 @@ TEST(syscall_restart)
|
||||
timeout.tv_sec = 1;
|
||||
errno = 0;
|
||||
EXPECT_EQ(0, nanosleep(&timeout, NULL)) {
|
||||
TH_LOG("Call to nanosleep() failed (errno %d)", errno);
|
||||
TH_LOG("Call to nanosleep() failed (errno %d: %s)",
|
||||
errno, strerror(errno));
|
||||
}
|
||||
|
||||
/* Read final sync from parent. */
|
||||
@ -3908,6 +3912,9 @@ TEST(user_notification_filter_empty)
|
||||
TH_LOG("Kernel does not support PR_SET_NO_NEW_PRIVS!");
|
||||
}
|
||||
|
||||
if (__NR_clone3 < 0)
|
||||
SKIP(return, "Test not built with clone3 support");
|
||||
|
||||
pid = sys_clone3(&args, sizeof(args));
|
||||
ASSERT_GE(pid, 0);
|
||||
|
||||
@ -3962,6 +3969,9 @@ TEST(user_notification_filter_empty_threaded)
|
||||
TH_LOG("Kernel does not support PR_SET_NO_NEW_PRIVS!");
|
||||
}
|
||||
|
||||
if (__NR_clone3 < 0)
|
||||
SKIP(return, "Test not built with clone3 support");
|
||||
|
||||
pid = sys_clone3(&args, sizeof(args));
|
||||
ASSERT_GE(pid, 0);
|
||||
|
||||
@ -4255,6 +4265,61 @@ TEST(user_notification_addfd_rlimit)
|
||||
close(memfd);
|
||||
}
|
||||
|
||||
#ifndef SECCOMP_USER_NOTIF_FD_SYNC_WAKE_UP
|
||||
#define SECCOMP_USER_NOTIF_FD_SYNC_WAKE_UP (1UL << 0)
|
||||
#define SECCOMP_IOCTL_NOTIF_SET_FLAGS SECCOMP_IOW(4, __u64)
|
||||
#endif
|
||||
|
||||
TEST(user_notification_sync)
|
||||
{
|
||||
struct seccomp_notif req = {};
|
||||
struct seccomp_notif_resp resp = {};
|
||||
int status, listener;
|
||||
pid_t pid;
|
||||
long ret;
|
||||
|
||||
ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0);
|
||||
ASSERT_EQ(0, ret) {
|
||||
TH_LOG("Kernel does not support PR_SET_NO_NEW_PRIVS!");
|
||||
}
|
||||
|
||||
listener = user_notif_syscall(__NR_getppid,
|
||||
SECCOMP_FILTER_FLAG_NEW_LISTENER);
|
||||
ASSERT_GE(listener, 0);
|
||||
|
||||
/* Try to set invalid flags. */
|
||||
EXPECT_SYSCALL_RETURN(-EINVAL,
|
||||
ioctl(listener, SECCOMP_IOCTL_NOTIF_SET_FLAGS, 0xffffffff, 0));
|
||||
|
||||
ASSERT_EQ(ioctl(listener, SECCOMP_IOCTL_NOTIF_SET_FLAGS,
|
||||
SECCOMP_USER_NOTIF_FD_SYNC_WAKE_UP, 0), 0);
|
||||
|
||||
pid = fork();
|
||||
ASSERT_GE(pid, 0);
|
||||
if (pid == 0) {
|
||||
ret = syscall(__NR_getppid);
|
||||
ASSERT_EQ(ret, USER_NOTIF_MAGIC) {
|
||||
_exit(1);
|
||||
}
|
||||
_exit(0);
|
||||
}
|
||||
|
||||
req.pid = 0;
|
||||
ASSERT_EQ(ioctl(listener, SECCOMP_IOCTL_NOTIF_RECV, &req), 0);
|
||||
|
||||
ASSERT_EQ(req.data.nr, __NR_getppid);
|
||||
|
||||
resp.id = req.id;
|
||||
resp.error = 0;
|
||||
resp.val = USER_NOTIF_MAGIC;
|
||||
resp.flags = 0;
|
||||
ASSERT_EQ(ioctl(listener, SECCOMP_IOCTL_NOTIF_SEND, &resp), 0);
|
||||
|
||||
ASSERT_EQ(waitpid(pid, &status, 0), pid);
|
||||
ASSERT_EQ(status, 0);
|
||||
}
|
||||
|
||||
|
||||
/* Make sure PTRACE_O_SUSPEND_SECCOMP requires CAP_SYS_ADMIN. */
|
||||
FIXTURE(O_SUSPEND_SECCOMP) {
|
||||
pid_t pid;
|
||||
|
Loading…
x
Reference in New Issue
Block a user