d9f21b3613
If more than 16000 inflight AF_UNIX sockets exist and the garbage collector is not running, unix_(dgram|stream)_sendmsg() call unix_gc(). Also, they wait for unix_gc() to complete. In unix_gc(), all inflight AF_UNIX sockets are traversed at least once, and more if they are the GC candidate. Thus, sendmsg() significantly slows down with too many inflight AF_UNIX sockets. However, if a process sends data with no AF_UNIX FD, the sendmsg() call does not need to wait for GC. After this change, only the process that meets the condition below will be blocked under such a situation. 1) cmsg contains AF_UNIX socket 2) more than 32 AF_UNIX sent by the same user are still inflight Note that even a sendmsg() call that does not meet the condition but has AF_UNIX FD will be blocked later in unix_scm_to_skb() by the spinlock, but we allow that as a bonus for sane users. The results below are the time spent in unix_dgram_sendmsg() sending 1 byte of data with no FD 4096 times on a host where 32K inflight AF_UNIX sockets exist. Without series: the sane sendmsg() needs to wait gc unreasonably. $ sudo /usr/share/bcc/tools/funclatency -p 11165 unix_dgram_sendmsg Tracing 1 functions for "unix_dgram_sendmsg"... Hit Ctrl-C to end. ^C nsecs : count distribution [...] 524288 -> 1048575 : 0 | | 1048576 -> 2097151 : 3881 |****************************************| 2097152 -> 4194303 : 214 |** | 4194304 -> 8388607 : 1 | | avg = 1825567 nsecs, total: 7477526027 nsecs, count: 4096 With series: the sane sendmsg() can finish much faster. $ sudo /usr/share/bcc/tools/funclatency -p 8702 unix_dgram_sendmsg Tracing 1 functions for "unix_dgram_sendmsg"... Hit Ctrl-C to end. ^C nsecs : count distribution [...] 128 -> 255 : 0 | | 256 -> 511 : 4092 |****************************************| 512 -> 1023 : 2 | | 1024 -> 2047 : 0 | | 2048 -> 4095 : 0 | | 4096 -> 8191 : 1 | | 8192 -> 16383 : 1 | | avg = 410 nsecs, total: 1680510 nsecs, count: 4096 Signed-off-by: Kuniyuki Iwashima <kuniyu@amazon.com> Link: https://lore.kernel.org/r/20240123170856.41348-6-kuniyu@amazon.com Signed-off-by: Jakub Kicinski <kuba@kernel.org>
223 lines
5.2 KiB
C
223 lines
5.2 KiB
C
/* SPDX-License-Identifier: GPL-2.0 */
|
|
#ifndef __LINUX_NET_SCM_H
|
|
#define __LINUX_NET_SCM_H
|
|
|
|
#include <linux/limits.h>
|
|
#include <linux/net.h>
|
|
#include <linux/cred.h>
|
|
#include <linux/file.h>
|
|
#include <linux/security.h>
|
|
#include <linux/pid.h>
|
|
#include <linux/nsproxy.h>
|
|
#include <linux/sched/signal.h>
|
|
#include <net/compat.h>
|
|
|
|
/* Well, we should have at least one descriptor open
|
|
* to accept passed FDs 8)
|
|
*/
|
|
#define SCM_MAX_FD 253
|
|
|
|
struct scm_creds {
|
|
u32 pid;
|
|
kuid_t uid;
|
|
kgid_t gid;
|
|
};
|
|
|
|
struct scm_fp_list {
|
|
short count;
|
|
short count_unix;
|
|
short max;
|
|
struct user_struct *user;
|
|
struct file *fp[SCM_MAX_FD];
|
|
};
|
|
|
|
struct scm_cookie {
|
|
struct pid *pid; /* Skb credentials */
|
|
struct scm_fp_list *fp; /* Passed files */
|
|
struct scm_creds creds; /* Skb credentials */
|
|
#ifdef CONFIG_SECURITY_NETWORK
|
|
u32 secid; /* Passed security ID */
|
|
#endif
|
|
};
|
|
|
|
void scm_detach_fds(struct msghdr *msg, struct scm_cookie *scm);
|
|
void scm_detach_fds_compat(struct msghdr *msg, struct scm_cookie *scm);
|
|
int __scm_send(struct socket *sock, struct msghdr *msg, struct scm_cookie *scm);
|
|
void __scm_destroy(struct scm_cookie *scm);
|
|
struct scm_fp_list *scm_fp_dup(struct scm_fp_list *fpl);
|
|
|
|
#ifdef CONFIG_SECURITY_NETWORK
|
|
static __inline__ void unix_get_peersec_dgram(struct socket *sock, struct scm_cookie *scm)
|
|
{
|
|
security_socket_getpeersec_dgram(sock, NULL, &scm->secid);
|
|
}
|
|
#else
|
|
static __inline__ void unix_get_peersec_dgram(struct socket *sock, struct scm_cookie *scm)
|
|
{ }
|
|
#endif /* CONFIG_SECURITY_NETWORK */
|
|
|
|
static __inline__ void scm_set_cred(struct scm_cookie *scm,
|
|
struct pid *pid, kuid_t uid, kgid_t gid)
|
|
{
|
|
scm->pid = get_pid(pid);
|
|
scm->creds.pid = pid_vnr(pid);
|
|
scm->creds.uid = uid;
|
|
scm->creds.gid = gid;
|
|
}
|
|
|
|
static __inline__ void scm_destroy_cred(struct scm_cookie *scm)
|
|
{
|
|
put_pid(scm->pid);
|
|
scm->pid = NULL;
|
|
}
|
|
|
|
static __inline__ void scm_destroy(struct scm_cookie *scm)
|
|
{
|
|
scm_destroy_cred(scm);
|
|
if (scm->fp)
|
|
__scm_destroy(scm);
|
|
}
|
|
|
|
static __inline__ int scm_send(struct socket *sock, struct msghdr *msg,
|
|
struct scm_cookie *scm, bool forcecreds)
|
|
{
|
|
memset(scm, 0, sizeof(*scm));
|
|
scm->creds.uid = INVALID_UID;
|
|
scm->creds.gid = INVALID_GID;
|
|
if (forcecreds)
|
|
scm_set_cred(scm, task_tgid(current), current_uid(), current_gid());
|
|
unix_get_peersec_dgram(sock, scm);
|
|
if (msg->msg_controllen <= 0)
|
|
return 0;
|
|
return __scm_send(sock, msg, scm);
|
|
}
|
|
|
|
#ifdef CONFIG_SECURITY_NETWORK
|
|
static inline void scm_passec(struct socket *sock, struct msghdr *msg, struct scm_cookie *scm)
|
|
{
|
|
char *secdata;
|
|
u32 seclen;
|
|
int err;
|
|
|
|
if (test_bit(SOCK_PASSSEC, &sock->flags)) {
|
|
err = security_secid_to_secctx(scm->secid, &secdata, &seclen);
|
|
|
|
if (!err) {
|
|
put_cmsg(msg, SOL_SOCKET, SCM_SECURITY, seclen, secdata);
|
|
security_release_secctx(secdata, seclen);
|
|
}
|
|
}
|
|
}
|
|
|
|
static inline bool scm_has_secdata(struct socket *sock)
|
|
{
|
|
return test_bit(SOCK_PASSSEC, &sock->flags);
|
|
}
|
|
#else
|
|
static inline void scm_passec(struct socket *sock, struct msghdr *msg, struct scm_cookie *scm)
|
|
{ }
|
|
|
|
static inline bool scm_has_secdata(struct socket *sock)
|
|
{
|
|
return false;
|
|
}
|
|
#endif /* CONFIG_SECURITY_NETWORK */
|
|
|
|
static __inline__ void scm_pidfd_recv(struct msghdr *msg, struct scm_cookie *scm)
|
|
{
|
|
struct file *pidfd_file = NULL;
|
|
int len, pidfd;
|
|
|
|
/* put_cmsg() doesn't return an error if CMSG is truncated,
|
|
* that's why we need to opencode these checks here.
|
|
*/
|
|
if (msg->msg_flags & MSG_CMSG_COMPAT)
|
|
len = sizeof(struct compat_cmsghdr) + sizeof(int);
|
|
else
|
|
len = sizeof(struct cmsghdr) + sizeof(int);
|
|
|
|
if (msg->msg_controllen < len) {
|
|
msg->msg_flags |= MSG_CTRUNC;
|
|
return;
|
|
}
|
|
|
|
if (!scm->pid)
|
|
return;
|
|
|
|
pidfd = pidfd_prepare(scm->pid, 0, &pidfd_file);
|
|
|
|
if (put_cmsg(msg, SOL_SOCKET, SCM_PIDFD, sizeof(int), &pidfd)) {
|
|
if (pidfd_file) {
|
|
put_unused_fd(pidfd);
|
|
fput(pidfd_file);
|
|
}
|
|
|
|
return;
|
|
}
|
|
|
|
if (pidfd_file)
|
|
fd_install(pidfd, pidfd_file);
|
|
}
|
|
|
|
static inline bool __scm_recv_common(struct socket *sock, struct msghdr *msg,
|
|
struct scm_cookie *scm, int flags)
|
|
{
|
|
if (!msg->msg_control) {
|
|
if (test_bit(SOCK_PASSCRED, &sock->flags) ||
|
|
test_bit(SOCK_PASSPIDFD, &sock->flags) ||
|
|
scm->fp || scm_has_secdata(sock))
|
|
msg->msg_flags |= MSG_CTRUNC;
|
|
scm_destroy(scm);
|
|
return false;
|
|
}
|
|
|
|
if (test_bit(SOCK_PASSCRED, &sock->flags)) {
|
|
struct user_namespace *current_ns = current_user_ns();
|
|
struct ucred ucreds = {
|
|
.pid = scm->creds.pid,
|
|
.uid = from_kuid_munged(current_ns, scm->creds.uid),
|
|
.gid = from_kgid_munged(current_ns, scm->creds.gid),
|
|
};
|
|
put_cmsg(msg, SOL_SOCKET, SCM_CREDENTIALS, sizeof(ucreds), &ucreds);
|
|
}
|
|
|
|
scm_passec(sock, msg, scm);
|
|
|
|
if (scm->fp)
|
|
scm_detach_fds(msg, scm);
|
|
|
|
return true;
|
|
}
|
|
|
|
static inline void scm_recv(struct socket *sock, struct msghdr *msg,
|
|
struct scm_cookie *scm, int flags)
|
|
{
|
|
if (!__scm_recv_common(sock, msg, scm, flags))
|
|
return;
|
|
|
|
scm_destroy_cred(scm);
|
|
}
|
|
|
|
static inline void scm_recv_unix(struct socket *sock, struct msghdr *msg,
|
|
struct scm_cookie *scm, int flags)
|
|
{
|
|
if (!__scm_recv_common(sock, msg, scm, flags))
|
|
return;
|
|
|
|
if (test_bit(SOCK_PASSPIDFD, &sock->flags))
|
|
scm_pidfd_recv(msg, scm);
|
|
|
|
scm_destroy_cred(scm);
|
|
}
|
|
|
|
static inline int scm_recv_one_fd(struct file *f, int __user *ufd,
|
|
unsigned int flags)
|
|
{
|
|
if (!ufd)
|
|
return -EFAULT;
|
|
return receive_fd(f, ufd, flags);
|
|
}
|
|
|
|
#endif /* __LINUX_NET_SCM_H */
|
|
|