MEDIUM: capabilities: enable support for Linux capabilities

For a while there has been the constraint of having to run as root for
transparent proxying, and we're starting to see some cases where QUIC is
not running in socket-per-connection mode due to the missing capability
that would be needed to bind a privileged port. It's not realistic to
ask all QUIC users on port 443 to run as root, so instead let's provide
a basic support for capabilities at least on linux. The ones currently
supported are cap_net_raw, cap_net_admin and cap_net_bind_service. The
mechanism was made OS-specific with a dedicated file because it really
is. It can be easily refined later for other OSes if needed.

A new keyword "setcaps" is added to the global section, to enumerate the
capabilities that must be kept when switching from root to non-root. This
is ignored in other situations though. HAProxy has to be built with
USE_LINUX_CAP=1 for this to be supported, which is enabled by default
for linux-glibc, linux-glibc-legacy and linux-musl.

A good way to test this is to start haproxy with such a config:

    global
        uid 1000
        setcap cap_net_bind_service

    frontend test
        mode http
        timeout client 3s
        bind quic4@:443 ssl crt rsa+dh2048.pem allow-0rtt

and run it under "sudo strace -e trace=bind,setuid", then connecting
there from an H3 client. The bind() syscall must succeed despite the
user id having been switched.
This commit is contained in:
Willy Tarreau 2023-08-29 10:24:26 +02:00
parent 4d5f7d94b9
commit bd84387beb
5 changed files with 248 additions and 6 deletions

View File

@ -28,6 +28,7 @@
# USE_TPROXY : enable transparent proxy. Automatic. # USE_TPROXY : enable transparent proxy. Automatic.
# USE_LINUX_TPROXY : enable full transparent proxy. Automatic. # USE_LINUX_TPROXY : enable full transparent proxy. Automatic.
# USE_LINUX_SPLICE : enable kernel 2.6 splicing. Automatic. # USE_LINUX_SPLICE : enable kernel 2.6 splicing. Automatic.
# USE_LINUX_CAP : enable Linux capabilities.
# USE_LIBCRYPT : enable encrypted passwords using -lcrypt # USE_LIBCRYPT : enable encrypted passwords using -lcrypt
# USE_CRYPT_H : set it if your system requires including crypt.h # USE_CRYPT_H : set it if your system requires including crypt.h
# USE_GETADDRINFO : use getaddrinfo() to resolve IPv6 host names. # USE_GETADDRINFO : use getaddrinfo() to resolve IPv6 host names.
@ -305,7 +306,7 @@ LDFLAGS = $(ARCH_FLAGS) -g
# specific entries if present before them. # specific entries if present before them.
use_opts = USE_EPOLL USE_KQUEUE USE_NETFILTER USE_POLL \ use_opts = USE_EPOLL USE_KQUEUE USE_NETFILTER USE_POLL \
USE_THREAD USE_PTHREAD_EMULATION USE_BACKTRACE \ USE_THREAD USE_PTHREAD_EMULATION USE_BACKTRACE \
USE_TPROXY USE_LINUX_TPROXY \ USE_TPROXY USE_LINUX_TPROXY USE_LINUX_CAP \
USE_LINUX_SPLICE USE_LIBCRYPT USE_CRYPT_H USE_ENGINE \ USE_LINUX_SPLICE USE_LIBCRYPT USE_CRYPT_H USE_ENGINE \
USE_GETADDRINFO USE_OPENSSL USE_OPENSSL_WOLFSSL USE_SSL USE_LUA \ USE_GETADDRINFO USE_OPENSSL USE_OPENSSL_WOLFSSL USE_SSL USE_LUA \
USE_ACCEPT4 USE_CLOSEFROM USE_ZLIB USE_SLZ USE_CPU_AFFINITY \ USE_ACCEPT4 USE_CLOSEFROM USE_ZLIB USE_SLZ USE_CPU_AFFINITY \
@ -347,7 +348,7 @@ endif
ifeq ($(TARGET),linux-glibc) ifeq ($(TARGET),linux-glibc)
set_target_defaults = $(call default_opts, \ set_target_defaults = $(call default_opts, \
USE_POLL USE_TPROXY USE_LIBCRYPT USE_DL USE_RT USE_CRYPT_H USE_NETFILTER \ USE_POLL USE_TPROXY USE_LIBCRYPT USE_DL USE_RT USE_CRYPT_H USE_NETFILTER \
USE_CPU_AFFINITY USE_THREAD USE_EPOLL USE_LINUX_TPROXY \ USE_CPU_AFFINITY USE_THREAD USE_EPOLL USE_LINUX_TPROXY USE_LINUX_CAP \
USE_ACCEPT4 USE_LINUX_SPLICE USE_PRCTL USE_THREAD_DUMP USE_NS USE_TFO \ USE_ACCEPT4 USE_LINUX_SPLICE USE_PRCTL USE_THREAD_DUMP USE_NS USE_TFO \
USE_GETADDRINFO USE_BACKTRACE USE_SHM_OPEN) USE_GETADDRINFO USE_BACKTRACE USE_SHM_OPEN)
INSTALL = install -v INSTALL = install -v
@ -357,7 +358,7 @@ endif
ifeq ($(TARGET),linux-glibc-legacy) ifeq ($(TARGET),linux-glibc-legacy)
set_target_defaults = $(call default_opts, \ set_target_defaults = $(call default_opts, \
USE_POLL USE_TPROXY USE_LIBCRYPT USE_DL USE_RT USE_CRYPT_H USE_NETFILTER \ USE_POLL USE_TPROXY USE_LIBCRYPT USE_DL USE_RT USE_CRYPT_H USE_NETFILTER \
USE_CPU_AFFINITY USE_THREAD USE_EPOLL USE_LINUX_TPROXY \ USE_CPU_AFFINITY USE_THREAD USE_EPOLL USE_LINUX_TPROXY USE_LINUX_CAP \
USE_ACCEPT4 USE_LINUX_SPLICE USE_PRCTL USE_THREAD_DUMP USE_GETADDRINFO) USE_ACCEPT4 USE_LINUX_SPLICE USE_PRCTL USE_THREAD_DUMP USE_GETADDRINFO)
INSTALL = install -v INSTALL = install -v
endif endif
@ -366,7 +367,7 @@ endif
ifeq ($(TARGET),linux-musl) ifeq ($(TARGET),linux-musl)
set_target_defaults = $(call default_opts, \ set_target_defaults = $(call default_opts, \
USE_POLL USE_TPROXY USE_LIBCRYPT USE_DL USE_RT USE_CRYPT_H USE_NETFILTER \ USE_POLL USE_TPROXY USE_LIBCRYPT USE_DL USE_RT USE_CRYPT_H USE_NETFILTER \
USE_CPU_AFFINITY USE_THREAD USE_EPOLL USE_LINUX_TPROXY \ USE_CPU_AFFINITY USE_THREAD USE_EPOLL USE_LINUX_TPROXY USE_LINUX_CAP \
USE_ACCEPT4 USE_LINUX_SPLICE USE_PRCTL USE_THREAD_DUMP USE_NS USE_TFO \ USE_ACCEPT4 USE_LINUX_SPLICE USE_PRCTL USE_THREAD_DUMP USE_NS USE_TFO \
USE_GETADDRINFO USE_SHM_OPEN) USE_GETADDRINFO USE_SHM_OPEN)
INSTALL = install -v INSTALL = install -v
@ -813,6 +814,10 @@ ifneq ($(USE_NS),)
OPTIONS_OBJS += src/namespace.o OPTIONS_OBJS += src/namespace.o
endif endif
ifneq ($(USE_LINUX_CAP),)
OPTIONS_OBJS += src/linuxcap.o
endif
ifneq ($(USE_OT),) ifneq ($(USE_OT),)
include addons/ot/Makefile include addons/ot/Makefile
endif endif

View File

@ -2183,6 +2183,22 @@ set-var-fmt <var-name> <fmt>
set-var-fmt proc.current_state "primary" set-var-fmt proc.current_state "primary"
set-var-fmt proc.bootid "%pid|%t" set-var-fmt proc.bootid "%pid|%t"
setcap <name>[,<name>...]
Sets a list of capabilities that must be preserved when starting with uid 0
and switching to a non-zero uid. By default all permissions are lost by the
uid switch, but some are often needed when trying connecting to a server from
a foreign address during transparent proxying, or when binding to a port
below 1024, e.g. when using "tune.quic.socket-owner connection", resulting in
setups running entirely under uid 0. Setting capabilities generally is a
safer alternative, as only the required capabilities will be preserved. The
feature is OS-specific and only enabled on Linux when USE_LINUX_CAP=1 is set
at build time. The list of supported capabilities also depends on the OS and
is enumerated by the error message displayed when an invalid capability name
or an empty one is passed. Multiple capabilities may be passed, delimited by
commas. Among those commonly used, "cap_net_raw" allows to transparently bind
to a foreign address, and "cap_net_bind_service" allows to bind to a
privileged port and may be used by QUIC.
setenv <name> <value> setenv <name> <value>
Sets environment variable <name> to value <value>. If the variable exists, it Sets environment variable <name> to value <value>. If the variable exists, it
is overwritten. The changes immediately take effect so that the next line in is overwritten. The changes immediately take effect so that the next line in
@ -3424,7 +3440,8 @@ tune.quic.socket-owner { listener | connection }
network stack. If your platform is deemed not compatible, haproxy will network stack. If your platform is deemed not compatible, haproxy will
automatically switch to "listener" mode on startup. Please note that QUIC automatically switch to "listener" mode on startup. Please note that QUIC
listeners running on privileged ports may require to run as uid 0, or some listeners running on privileged ports may require to run as uid 0, or some
OS-specific tuning to permit the target uid to bind such ports. OS-specific tuning to permit the target uid to bind such ports, such as
system capabilities. See also the "setcap" global directive.
The "listener" value indicates that QUIC transfers will occur on the shared The "listener" value indicates that QUIC transfers will occur on the shared
listener socket. This option can be a good compromise for small traffic as it listener socket. This option can be a good compromise for small traffic as it
@ -11645,7 +11662,8 @@ source <addr>[:<port>] [interface <name>]
is possible at the server level using the "source" server option. Refer to is possible at the server level using the "source" server option. Refer to
section 5 for more information. section 5 for more information.
In order to work, "usesrc" requires root privileges. In order to work, "usesrc" requires root privileges, or on supported systems,
the "cap_net_raw" capability. See also the "setcap" global directive.
Examples : Examples :
backend private backend private

View File

@ -0,0 +1,7 @@
#ifndef _HAPROXY_LINUXCAP_H
#define _HAPROXY_LINUXCAP_H
int prepare_caps_for_setuid(int from_uid, int to_uid);
int finalize_caps_after_setuid(int from_uid, int to_uid);
#endif /* _HAPROXY_LINUXCAP_H */

View File

@ -108,6 +108,9 @@
#include <haproxy/global.h> #include <haproxy/global.h>
#include <haproxy/hlua.h> #include <haproxy/hlua.h>
#include <haproxy/http_rules.h> #include <haproxy/http_rules.h>
#if defined(USE_LINUX_CAP)
#include <haproxy/linuxcap.h>
#endif
#include <haproxy/list.h> #include <haproxy/list.h>
#include <haproxy/listener.h> #include <haproxy/listener.h>
#include <haproxy/log.h> #include <haproxy/log.h>
@ -3184,6 +3187,8 @@ static void *run_thread_poll_loop(void *data)
/* set uid/gid depending on global settings */ /* set uid/gid depending on global settings */
static void set_identity(const char *program_name) static void set_identity(const char *program_name)
{ {
int from_uid __maybe_unused = geteuid();
if (global.gid) { if (global.gid) {
if (getgroups(0, NULL) > 0 && setgroups(0, NULL) == -1) if (getgroups(0, NULL) > 0 && setgroups(0, NULL) == -1)
ha_warning("[%s.main()] Failed to drop supplementary groups. Using 'gid'/'group'" ha_warning("[%s.main()] Failed to drop supplementary groups. Using 'gid'/'group'"
@ -3196,11 +3201,27 @@ static void set_identity(const char *program_name)
} }
} }
#if defined(USE_LINUX_CAP)
if (prepare_caps_for_setuid(from_uid, global.uid) < 0) {
ha_alert("[%s.main()] Cannot switch uid to %d.\n", program_name, global.uid);
protocol_unbind_all();
exit(1);
}
#endif
if (global.uid && setuid(global.uid) == -1) { if (global.uid && setuid(global.uid) == -1) {
ha_alert("[%s.main()] Cannot set uid %d.\n", program_name, global.uid); ha_alert("[%s.main()] Cannot set uid %d.\n", program_name, global.uid);
protocol_unbind_all(); protocol_unbind_all();
exit(1); exit(1);
} }
#if defined(USE_LINUX_CAP)
if (finalize_caps_after_setuid(from_uid, global.uid) < 0) {
ha_alert("[%s.main()] Cannot switch uid to %d.\n", program_name, global.uid);
protocol_unbind_all();
exit(1);
}
#endif
} }
int main(int argc, char **argv) int main(int argc, char **argv)

191
src/linuxcap.c Normal file
View File

@ -0,0 +1,191 @@
/*
* Minimal handling of Linux kernel capabilities
*
* Copyright 2000-2023 Willy Tarreau <w@1wt.eu>
*
* This program is free software; you can redistribute it and/or
* modify it under the terms of the GNU General Public License
* as published by the Free Software Foundation; either version
* 2 of the License, or (at your option) any later version.
*
*/
/* Depending on distros, some have capset(), others use the more complicated
* libcap. Let's stick to what we need and the kernel documents (capset).
* Note that prctl is needed here.
*/
#include <linux/capability.h>
#include <sys/prctl.h>
#include <errno.h>
#include <unistd.h>
#include <syscall.h>
#include <haproxy/api.h>
#include <haproxy/cfgparse.h>
#include <haproxy/errors.h>
#include <haproxy/tools.h>
/* supported names, zero-terminated */
static const struct {
int cap;
const char *name;
} known_caps[] = {
#ifdef CAP_NET_RAW
{ CAP_NET_RAW, "cap_net_raw" },
#endif
#ifdef CAP_NET_ADMIN
{ CAP_NET_ADMIN, "cap_net_admin" },
#endif
#ifdef CAP_NET_BIND_SERVICE
{ CAP_NET_BIND_SERVICE, "cap_net_bind_service" },
#endif
/* must be last */
{ 0, 0 }
};
/* provided by sys/capability.h on some distros */
static inline int capset(cap_user_header_t hdrp, const cap_user_data_t datap)
{
return syscall(SYS_capset, hdrp, datap);
}
/* defaults to zero, i.e. we don't keep any cap after setuid() */
static uint32_t caplist;
/* try to apply capabilities before switching UID from <from_uid> to <to_uid>.
* In practice we need to do this in 4 steps:
* - set PR_SET_KEEPCAPS to preserve caps across the final setuid()
* - set the effective and permitted caps ;
* - switch euid to non-zero
* - set the effective and permitted caps again
* - then the caller can safely call setuid()
* We don't do this if the current euid is not zero or if the target uid
* is zero. Returns >=0 on success, negative on failure. Alerts or warnings
* may be emitted.
*/
int prepare_caps_for_setuid(int from_uid, int to_uid)
{
struct __user_cap_data_struct cap_data = { };
struct __user_cap_header_struct cap_hdr = {
.pid = 0, /* current process */
.version = _LINUX_CAPABILITY_VERSION_1,
};
if (from_uid != 0)
return 0;
if (!to_uid)
return 0;
if (!caplist)
return 0;
if (prctl(PR_SET_KEEPCAPS, 1) == -1) {
ha_alert("Failed to preserve capabilities using prctl(): %s\n", strerror(errno));
return -1;
}
cap_data.effective = cap_data.permitted = caplist | (1 << CAP_SETUID);
if (capset(&cap_hdr, &cap_data) == -1) {
ha_alert("Failed to preset the capabilities to preserve using capset(): %s\n", strerror(errno));
return -1;
}
if (seteuid(to_uid) == -1) {
ha_alert("Failed to set effective uid to %d: %s\n", to_uid, strerror(errno));
return -1;
}
cap_data.effective = cap_data.permitted = caplist | (1 << CAP_SETUID);
if (capset(&cap_hdr, &cap_data) == -1) {
ha_alert("Failed to set the final capabilities using capset(): %s\n", strerror(errno));
return -1;
}
/* all's good */
return 0;
}
/* finalize the capabilities after setuid(). The most important is to drop the
* CAP_SET_SETUID capability, which would otherwise allow to switch back to any
* UID and recover everything.
*/
int finalize_caps_after_setuid(int from_uid, int to_uid)
{
struct __user_cap_data_struct cap_data = { };
struct __user_cap_header_struct cap_hdr = {
.pid = 0, /* current process */
.version = _LINUX_CAPABILITY_VERSION_1,
};
if (from_uid != 0)
return 0;
if (!to_uid)
return 0;
if (!caplist)
return 0;
cap_data.effective = cap_data.permitted = caplist;
if (capset(&cap_hdr, &cap_data) == -1) {
ha_alert("Failed to drop the setuid capability using capset(): %s\n", strerror(errno));
return -1;
}
/* all's good */
return 0;
}
/* parse the "setcap" global keyword. Returns -1 on failure, 0 on success. */
static int cfg_parse_global_setcap(char **args, int section_type,
struct proxy *curpx, const struct proxy *defpx,
const char *file, int line, char **err)
{
char *name = args[1];
char *next;
uint32_t caps = 0;
int id;
if (!*name) {
memprintf(err, "'%s' : missing capability name(s). ", args[0]);
goto dump_caps;
}
while (name && *name) {
next = strchr(name, ',');
if (next)
*(next++) = '\0';
for (id = 0; known_caps[id].cap; id++) {
if (strcmp(name, known_caps[id].name) == 0) {
caps |= 1U << known_caps[id].cap;
break;
}
}
if (!known_caps[id].cap) {
memprintf(err, "'%s' : unsupported capability '%s'. ", args[0], args[1]);
goto dump_caps;
}
name = next;
}
caplist |= caps;
return 0;
dump_caps:
memprintf(err, "%s Supported ones are: ", *err);
for (id = 0; known_caps[id].cap; id++)
memprintf(err, "%s%s%s%s", *err,
id ? known_caps[id+1].cap ? ", " : " and " : "",
known_caps[id].name, known_caps[id+1].cap ? "" : ".");
return -1;
}
static struct cfg_kw_list cfg_kws = {ILH, {
{ CFG_GLOBAL, "setcap", cfg_parse_global_setcap },
{ 0, NULL, NULL }
}};
INITCALL1(STG_REGISTER, cfg_register_keywords, &cfg_kws);