MEDIUM: capabilities: enable support for Linux capabilities
For a while there has been the constraint of having to run as root for transparent proxying, and we're starting to see some cases where QUIC is not running in socket-per-connection mode due to the missing capability that would be needed to bind a privileged port. It's not realistic to ask all QUIC users on port 443 to run as root, so instead let's provide a basic support for capabilities at least on linux. The ones currently supported are cap_net_raw, cap_net_admin and cap_net_bind_service. The mechanism was made OS-specific with a dedicated file because it really is. It can be easily refined later for other OSes if needed. A new keyword "setcaps" is added to the global section, to enumerate the capabilities that must be kept when switching from root to non-root. This is ignored in other situations though. HAProxy has to be built with USE_LINUX_CAP=1 for this to be supported, which is enabled by default for linux-glibc, linux-glibc-legacy and linux-musl. A good way to test this is to start haproxy with such a config: global uid 1000 setcap cap_net_bind_service frontend test mode http timeout client 3s bind quic4@:443 ssl crt rsa+dh2048.pem allow-0rtt and run it under "sudo strace -e trace=bind,setuid", then connecting there from an H3 client. The bind() syscall must succeed despite the user id having been switched.
This commit is contained in:
parent
4d5f7d94b9
commit
bd84387beb
13
Makefile
13
Makefile
@ -28,6 +28,7 @@
|
||||
# USE_TPROXY : enable transparent proxy. Automatic.
|
||||
# USE_LINUX_TPROXY : enable full transparent proxy. Automatic.
|
||||
# USE_LINUX_SPLICE : enable kernel 2.6 splicing. Automatic.
|
||||
# USE_LINUX_CAP : enable Linux capabilities.
|
||||
# USE_LIBCRYPT : enable encrypted passwords using -lcrypt
|
||||
# USE_CRYPT_H : set it if your system requires including crypt.h
|
||||
# USE_GETADDRINFO : use getaddrinfo() to resolve IPv6 host names.
|
||||
@ -305,7 +306,7 @@ LDFLAGS = $(ARCH_FLAGS) -g
|
||||
# specific entries if present before them.
|
||||
use_opts = USE_EPOLL USE_KQUEUE USE_NETFILTER USE_POLL \
|
||||
USE_THREAD USE_PTHREAD_EMULATION USE_BACKTRACE \
|
||||
USE_TPROXY USE_LINUX_TPROXY \
|
||||
USE_TPROXY USE_LINUX_TPROXY USE_LINUX_CAP \
|
||||
USE_LINUX_SPLICE USE_LIBCRYPT USE_CRYPT_H USE_ENGINE \
|
||||
USE_GETADDRINFO USE_OPENSSL USE_OPENSSL_WOLFSSL USE_SSL USE_LUA \
|
||||
USE_ACCEPT4 USE_CLOSEFROM USE_ZLIB USE_SLZ USE_CPU_AFFINITY \
|
||||
@ -347,7 +348,7 @@ endif
|
||||
ifeq ($(TARGET),linux-glibc)
|
||||
set_target_defaults = $(call default_opts, \
|
||||
USE_POLL USE_TPROXY USE_LIBCRYPT USE_DL USE_RT USE_CRYPT_H USE_NETFILTER \
|
||||
USE_CPU_AFFINITY USE_THREAD USE_EPOLL USE_LINUX_TPROXY \
|
||||
USE_CPU_AFFINITY USE_THREAD USE_EPOLL USE_LINUX_TPROXY USE_LINUX_CAP \
|
||||
USE_ACCEPT4 USE_LINUX_SPLICE USE_PRCTL USE_THREAD_DUMP USE_NS USE_TFO \
|
||||
USE_GETADDRINFO USE_BACKTRACE USE_SHM_OPEN)
|
||||
INSTALL = install -v
|
||||
@ -357,7 +358,7 @@ endif
|
||||
ifeq ($(TARGET),linux-glibc-legacy)
|
||||
set_target_defaults = $(call default_opts, \
|
||||
USE_POLL USE_TPROXY USE_LIBCRYPT USE_DL USE_RT USE_CRYPT_H USE_NETFILTER \
|
||||
USE_CPU_AFFINITY USE_THREAD USE_EPOLL USE_LINUX_TPROXY \
|
||||
USE_CPU_AFFINITY USE_THREAD USE_EPOLL USE_LINUX_TPROXY USE_LINUX_CAP \
|
||||
USE_ACCEPT4 USE_LINUX_SPLICE USE_PRCTL USE_THREAD_DUMP USE_GETADDRINFO)
|
||||
INSTALL = install -v
|
||||
endif
|
||||
@ -366,7 +367,7 @@ endif
|
||||
ifeq ($(TARGET),linux-musl)
|
||||
set_target_defaults = $(call default_opts, \
|
||||
USE_POLL USE_TPROXY USE_LIBCRYPT USE_DL USE_RT USE_CRYPT_H USE_NETFILTER \
|
||||
USE_CPU_AFFINITY USE_THREAD USE_EPOLL USE_LINUX_TPROXY \
|
||||
USE_CPU_AFFINITY USE_THREAD USE_EPOLL USE_LINUX_TPROXY USE_LINUX_CAP \
|
||||
USE_ACCEPT4 USE_LINUX_SPLICE USE_PRCTL USE_THREAD_DUMP USE_NS USE_TFO \
|
||||
USE_GETADDRINFO USE_SHM_OPEN)
|
||||
INSTALL = install -v
|
||||
@ -813,6 +814,10 @@ ifneq ($(USE_NS),)
|
||||
OPTIONS_OBJS += src/namespace.o
|
||||
endif
|
||||
|
||||
ifneq ($(USE_LINUX_CAP),)
|
||||
OPTIONS_OBJS += src/linuxcap.o
|
||||
endif
|
||||
|
||||
ifneq ($(USE_OT),)
|
||||
include addons/ot/Makefile
|
||||
endif
|
||||
|
@ -2183,6 +2183,22 @@ set-var-fmt <var-name> <fmt>
|
||||
set-var-fmt proc.current_state "primary"
|
||||
set-var-fmt proc.bootid "%pid|%t"
|
||||
|
||||
setcap <name>[,<name>...]
|
||||
Sets a list of capabilities that must be preserved when starting with uid 0
|
||||
and switching to a non-zero uid. By default all permissions are lost by the
|
||||
uid switch, but some are often needed when trying connecting to a server from
|
||||
a foreign address during transparent proxying, or when binding to a port
|
||||
below 1024, e.g. when using "tune.quic.socket-owner connection", resulting in
|
||||
setups running entirely under uid 0. Setting capabilities generally is a
|
||||
safer alternative, as only the required capabilities will be preserved. The
|
||||
feature is OS-specific and only enabled on Linux when USE_LINUX_CAP=1 is set
|
||||
at build time. The list of supported capabilities also depends on the OS and
|
||||
is enumerated by the error message displayed when an invalid capability name
|
||||
or an empty one is passed. Multiple capabilities may be passed, delimited by
|
||||
commas. Among those commonly used, "cap_net_raw" allows to transparently bind
|
||||
to a foreign address, and "cap_net_bind_service" allows to bind to a
|
||||
privileged port and may be used by QUIC.
|
||||
|
||||
setenv <name> <value>
|
||||
Sets environment variable <name> to value <value>. If the variable exists, it
|
||||
is overwritten. The changes immediately take effect so that the next line in
|
||||
@ -3424,7 +3440,8 @@ tune.quic.socket-owner { listener | connection }
|
||||
network stack. If your platform is deemed not compatible, haproxy will
|
||||
automatically switch to "listener" mode on startup. Please note that QUIC
|
||||
listeners running on privileged ports may require to run as uid 0, or some
|
||||
OS-specific tuning to permit the target uid to bind such ports.
|
||||
OS-specific tuning to permit the target uid to bind such ports, such as
|
||||
system capabilities. See also the "setcap" global directive.
|
||||
|
||||
The "listener" value indicates that QUIC transfers will occur on the shared
|
||||
listener socket. This option can be a good compromise for small traffic as it
|
||||
@ -11645,7 +11662,8 @@ source <addr>[:<port>] [interface <name>]
|
||||
is possible at the server level using the "source" server option. Refer to
|
||||
section 5 for more information.
|
||||
|
||||
In order to work, "usesrc" requires root privileges.
|
||||
In order to work, "usesrc" requires root privileges, or on supported systems,
|
||||
the "cap_net_raw" capability. See also the "setcap" global directive.
|
||||
|
||||
Examples :
|
||||
backend private
|
||||
|
7
include/haproxy/linuxcap.h
Normal file
7
include/haproxy/linuxcap.h
Normal file
@ -0,0 +1,7 @@
|
||||
#ifndef _HAPROXY_LINUXCAP_H
|
||||
#define _HAPROXY_LINUXCAP_H
|
||||
|
||||
int prepare_caps_for_setuid(int from_uid, int to_uid);
|
||||
int finalize_caps_after_setuid(int from_uid, int to_uid);
|
||||
|
||||
#endif /* _HAPROXY_LINUXCAP_H */
|
@ -108,6 +108,9 @@
|
||||
#include <haproxy/global.h>
|
||||
#include <haproxy/hlua.h>
|
||||
#include <haproxy/http_rules.h>
|
||||
#if defined(USE_LINUX_CAP)
|
||||
#include <haproxy/linuxcap.h>
|
||||
#endif
|
||||
#include <haproxy/list.h>
|
||||
#include <haproxy/listener.h>
|
||||
#include <haproxy/log.h>
|
||||
@ -3184,6 +3187,8 @@ static void *run_thread_poll_loop(void *data)
|
||||
/* set uid/gid depending on global settings */
|
||||
static void set_identity(const char *program_name)
|
||||
{
|
||||
int from_uid __maybe_unused = geteuid();
|
||||
|
||||
if (global.gid) {
|
||||
if (getgroups(0, NULL) > 0 && setgroups(0, NULL) == -1)
|
||||
ha_warning("[%s.main()] Failed to drop supplementary groups. Using 'gid'/'group'"
|
||||
@ -3196,11 +3201,27 @@ static void set_identity(const char *program_name)
|
||||
}
|
||||
}
|
||||
|
||||
#if defined(USE_LINUX_CAP)
|
||||
if (prepare_caps_for_setuid(from_uid, global.uid) < 0) {
|
||||
ha_alert("[%s.main()] Cannot switch uid to %d.\n", program_name, global.uid);
|
||||
protocol_unbind_all();
|
||||
exit(1);
|
||||
}
|
||||
#endif
|
||||
|
||||
if (global.uid && setuid(global.uid) == -1) {
|
||||
ha_alert("[%s.main()] Cannot set uid %d.\n", program_name, global.uid);
|
||||
protocol_unbind_all();
|
||||
exit(1);
|
||||
}
|
||||
|
||||
#if defined(USE_LINUX_CAP)
|
||||
if (finalize_caps_after_setuid(from_uid, global.uid) < 0) {
|
||||
ha_alert("[%s.main()] Cannot switch uid to %d.\n", program_name, global.uid);
|
||||
protocol_unbind_all();
|
||||
exit(1);
|
||||
}
|
||||
#endif
|
||||
}
|
||||
|
||||
int main(int argc, char **argv)
|
||||
|
191
src/linuxcap.c
Normal file
191
src/linuxcap.c
Normal file
@ -0,0 +1,191 @@
|
||||
/*
|
||||
* Minimal handling of Linux kernel capabilities
|
||||
*
|
||||
* Copyright 2000-2023 Willy Tarreau <w@1wt.eu>
|
||||
*
|
||||
* This program is free software; you can redistribute it and/or
|
||||
* modify it under the terms of the GNU General Public License
|
||||
* as published by the Free Software Foundation; either version
|
||||
* 2 of the License, or (at your option) any later version.
|
||||
*
|
||||
*/
|
||||
|
||||
/* Depending on distros, some have capset(), others use the more complicated
|
||||
* libcap. Let's stick to what we need and the kernel documents (capset).
|
||||
* Note that prctl is needed here.
|
||||
*/
|
||||
#include <linux/capability.h>
|
||||
#include <sys/prctl.h>
|
||||
#include <errno.h>
|
||||
#include <unistd.h>
|
||||
#include <syscall.h>
|
||||
|
||||
#include <haproxy/api.h>
|
||||
#include <haproxy/cfgparse.h>
|
||||
#include <haproxy/errors.h>
|
||||
#include <haproxy/tools.h>
|
||||
|
||||
/* supported names, zero-terminated */
|
||||
static const struct {
|
||||
int cap;
|
||||
const char *name;
|
||||
} known_caps[] = {
|
||||
#ifdef CAP_NET_RAW
|
||||
{ CAP_NET_RAW, "cap_net_raw" },
|
||||
#endif
|
||||
#ifdef CAP_NET_ADMIN
|
||||
{ CAP_NET_ADMIN, "cap_net_admin" },
|
||||
#endif
|
||||
#ifdef CAP_NET_BIND_SERVICE
|
||||
{ CAP_NET_BIND_SERVICE, "cap_net_bind_service" },
|
||||
#endif
|
||||
/* must be last */
|
||||
{ 0, 0 }
|
||||
};
|
||||
|
||||
/* provided by sys/capability.h on some distros */
|
||||
static inline int capset(cap_user_header_t hdrp, const cap_user_data_t datap)
|
||||
{
|
||||
return syscall(SYS_capset, hdrp, datap);
|
||||
}
|
||||
|
||||
/* defaults to zero, i.e. we don't keep any cap after setuid() */
|
||||
static uint32_t caplist;
|
||||
|
||||
/* try to apply capabilities before switching UID from <from_uid> to <to_uid>.
|
||||
* In practice we need to do this in 4 steps:
|
||||
* - set PR_SET_KEEPCAPS to preserve caps across the final setuid()
|
||||
* - set the effective and permitted caps ;
|
||||
* - switch euid to non-zero
|
||||
* - set the effective and permitted caps again
|
||||
* - then the caller can safely call setuid()
|
||||
* We don't do this if the current euid is not zero or if the target uid
|
||||
* is zero. Returns >=0 on success, negative on failure. Alerts or warnings
|
||||
* may be emitted.
|
||||
*/
|
||||
int prepare_caps_for_setuid(int from_uid, int to_uid)
|
||||
{
|
||||
struct __user_cap_data_struct cap_data = { };
|
||||
struct __user_cap_header_struct cap_hdr = {
|
||||
.pid = 0, /* current process */
|
||||
.version = _LINUX_CAPABILITY_VERSION_1,
|
||||
};
|
||||
|
||||
if (from_uid != 0)
|
||||
return 0;
|
||||
|
||||
if (!to_uid)
|
||||
return 0;
|
||||
|
||||
if (!caplist)
|
||||
return 0;
|
||||
|
||||
if (prctl(PR_SET_KEEPCAPS, 1) == -1) {
|
||||
ha_alert("Failed to preserve capabilities using prctl(): %s\n", strerror(errno));
|
||||
return -1;
|
||||
}
|
||||
|
||||
cap_data.effective = cap_data.permitted = caplist | (1 << CAP_SETUID);
|
||||
if (capset(&cap_hdr, &cap_data) == -1) {
|
||||
ha_alert("Failed to preset the capabilities to preserve using capset(): %s\n", strerror(errno));
|
||||
return -1;
|
||||
}
|
||||
|
||||
if (seteuid(to_uid) == -1) {
|
||||
ha_alert("Failed to set effective uid to %d: %s\n", to_uid, strerror(errno));
|
||||
return -1;
|
||||
}
|
||||
|
||||
cap_data.effective = cap_data.permitted = caplist | (1 << CAP_SETUID);
|
||||
if (capset(&cap_hdr, &cap_data) == -1) {
|
||||
ha_alert("Failed to set the final capabilities using capset(): %s\n", strerror(errno));
|
||||
return -1;
|
||||
}
|
||||
/* all's good */
|
||||
return 0;
|
||||
}
|
||||
|
||||
/* finalize the capabilities after setuid(). The most important is to drop the
|
||||
* CAP_SET_SETUID capability, which would otherwise allow to switch back to any
|
||||
* UID and recover everything.
|
||||
*/
|
||||
int finalize_caps_after_setuid(int from_uid, int to_uid)
|
||||
{
|
||||
struct __user_cap_data_struct cap_data = { };
|
||||
struct __user_cap_header_struct cap_hdr = {
|
||||
.pid = 0, /* current process */
|
||||
.version = _LINUX_CAPABILITY_VERSION_1,
|
||||
};
|
||||
|
||||
if (from_uid != 0)
|
||||
return 0;
|
||||
|
||||
if (!to_uid)
|
||||
return 0;
|
||||
|
||||
if (!caplist)
|
||||
return 0;
|
||||
|
||||
cap_data.effective = cap_data.permitted = caplist;
|
||||
if (capset(&cap_hdr, &cap_data) == -1) {
|
||||
ha_alert("Failed to drop the setuid capability using capset(): %s\n", strerror(errno));
|
||||
return -1;
|
||||
}
|
||||
/* all's good */
|
||||
return 0;
|
||||
}
|
||||
|
||||
/* parse the "setcap" global keyword. Returns -1 on failure, 0 on success. */
|
||||
static int cfg_parse_global_setcap(char **args, int section_type,
|
||||
struct proxy *curpx, const struct proxy *defpx,
|
||||
const char *file, int line, char **err)
|
||||
{
|
||||
char *name = args[1];
|
||||
char *next;
|
||||
uint32_t caps = 0;
|
||||
int id;
|
||||
|
||||
if (!*name) {
|
||||
memprintf(err, "'%s' : missing capability name(s). ", args[0]);
|
||||
goto dump_caps;
|
||||
}
|
||||
|
||||
while (name && *name) {
|
||||
next = strchr(name, ',');
|
||||
if (next)
|
||||
*(next++) = '\0';
|
||||
|
||||
for (id = 0; known_caps[id].cap; id++) {
|
||||
if (strcmp(name, known_caps[id].name) == 0) {
|
||||
caps |= 1U << known_caps[id].cap;
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
if (!known_caps[id].cap) {
|
||||
memprintf(err, "'%s' : unsupported capability '%s'. ", args[0], args[1]);
|
||||
goto dump_caps;
|
||||
}
|
||||
name = next;
|
||||
}
|
||||
|
||||
caplist |= caps;
|
||||
return 0;
|
||||
|
||||
|
||||
dump_caps:
|
||||
memprintf(err, "%s Supported ones are: ", *err);
|
||||
|
||||
for (id = 0; known_caps[id].cap; id++)
|
||||
memprintf(err, "%s%s%s%s", *err,
|
||||
id ? known_caps[id+1].cap ? ", " : " and " : "",
|
||||
known_caps[id].name, known_caps[id+1].cap ? "" : ".");
|
||||
return -1;
|
||||
}
|
||||
|
||||
static struct cfg_kw_list cfg_kws = {ILH, {
|
||||
{ CFG_GLOBAL, "setcap", cfg_parse_global_setcap },
|
||||
{ 0, NULL, NULL }
|
||||
}};
|
||||
|
||||
INITCALL1(STG_REGISTER, cfg_register_keywords, &cfg_kws);
|
Loading…
Reference in New Issue
Block a user