rlimit-util: add pid_getrlimit() helper

This is gets the resource limits off a specified process, and is very similar to prlimit() with a NULL new_rlimit argument. In fact, it tries that first. However, it then falls back to use /proc/$PID/limits. Why? Simply because Linux prohibits access to prlimit() for processes with a different UID, but /proc/$PID/limits still works. This is preparation to allow nspawn to run unprivileged.
2024-12-25 01:34:28 +03:00 · 2023-12-08 10:25:57 +01:00 · 2023-12-08 10:25:57 +01:00 · 21c43631d7
commit 21c43631d7
parent 1845fccada
4 changed files with 168 additions and 3 deletions
--- a/src/basic/rlimit-util.c
+++ b/src/basic/rlimit-util.c
@ -6,11 +6,14 @@
 #include "errno-util.h"
 #include "extract-word.h"
 #include "fd-util.h"
+#include "fileio.h"
 #include "format-util.h"
 #include "macro.h"
 #include "missing_resource.h"
+#include "process-util.h"
 #include "rlimit-util.h"
 #include "string-table.h"
+#include "strv.h"
 #include "time-util.h"

 int setrlimit_closest(int resource, const struct rlimit *rlim) {
@ -426,3 +429,116 @@ int rlimit_nofile_safe(void) {

        return 1;
 }
+
+int pid_getrlimit(pid_t pid, int resource, struct rlimit *ret) {
+
+        static const char * const prefix_table[_RLIMIT_MAX] = {
+                [RLIMIT_CPU]        = "Max cpu time",
+                [RLIMIT_FSIZE]      = "Max file size",
+                [RLIMIT_DATA]       = "Max data size",
+                [RLIMIT_STACK]      = "Max stack size",
+                [RLIMIT_CORE]       = "Max core file size",
+                [RLIMIT_RSS]        = "Max resident set",
+                [RLIMIT_NPROC]      = "Max processes",
+                [RLIMIT_NOFILE]     = "Max open files",
+                [RLIMIT_MEMLOCK]    = "Max locked memory",
+                [RLIMIT_AS]         = "Max address space",
+                [RLIMIT_LOCKS]      = "Max file locks",
+                [RLIMIT_SIGPENDING] = "Max pending signals",
+                [RLIMIT_MSGQUEUE]   = "Max msgqueue size",
+                [RLIMIT_NICE]       = "Max nice priority",
+                [RLIMIT_RTPRIO]     = "Max realtime priority",
+                [RLIMIT_RTTIME]     = "Max realtime timeout",
+        };
+
+        int r;
+
+        assert(resource >= 0);
+        assert(resource < _RLIMIT_MAX);
+        assert(pid >= 0);
+        assert(ret);
+
+        if (pid == 0 || pid == getpid_cached())
+                return RET_NERRNO(getrlimit(resource, ret));
+
+        r = RET_NERRNO(prlimit(pid, resource, /* new_limit= */ NULL, ret));
+        if (!ERRNO_IS_NEG_PRIVILEGE(r))
+                return r;
+
+        /* We don't have access? Then try to go via /proc/$PID/limits. Weirdly that's world readable in
+         * contrast to querying the data via prlimit() */
+
+        const char *p = procfs_file_alloca(pid, "limits");
+        _cleanup_free_ char *limits = NULL;
+
+        r = read_full_virtual_file(p, &limits, NULL);
+        if (r < 0)
+                return -EPERM; /* propagate original permission error if we can't access the limits file */
+
+        _cleanup_strv_free_ char **l = NULL;
+        l = strv_split(limits, "\n");
+        if (!l)
+                return -ENOMEM;
+
+        STRV_FOREACH(i, strv_skip(l, 1)) {
+                _cleanup_free_ char *soft = NULL, *hard = NULL;
+                uint64_t sv, hv;
+                const char *e;
+
+                e = startswith(*i, prefix_table[resource]);
+                if (!e)
+                        continue;
+
+                if (*e != ' ')
+                        continue;
+
+                e += strspn(e, WHITESPACE);
+
+                size_t n;
+                n = strcspn(e, WHITESPACE);
+                if (n == 0)
+                        continue;
+
+                soft = strndup(e, n);
+                if (!soft)
+                        return -ENOMEM;
+
+                e += n;
+                if (*e != ' ')
+                        continue;
+
+                e += strspn(e, WHITESPACE);
+                n = strcspn(e, WHITESPACE);
+                if (n == 0)
+                        continue;
+
+                hard = strndup(e, n);
+                if (!hard)
+                        return -ENOMEM;
+
+                if (streq(soft, "unlimited"))
+                        sv = RLIM_INFINITY;
+                else {
+                        r = safe_atou64(soft, &sv);
+                        if (r < 0)
+                                return r;
+                }
+
+                if (streq(hard, "unlimited"))
+                        hv = RLIM_INFINITY;
+                else {
+                        r = safe_atou64(hard, &hv);
+                        if (r < 0)
+                                return r;
+                }
+
+                *ret = (struct rlimit) {
+                        .rlim_cur = sv,
+                        .rlim_max = hv,
+                };
+
+                return 0;
+        }
+
+        return -ENOTRECOVERABLE;
+}
--- a/src/basic/rlimit-util.h
+++ b/src/basic/rlimit-util.h
@ -25,3 +25,5 @@ void rlimit_free_all(struct rlimit **rl);

 int rlimit_nofile_bump(int limit);
 int rlimit_nofile_safe(void);
+
+int pid_getrlimit(pid_t pid, int resource, struct rlimit *ret);
--- a/src/nspawn/nspawn.c
+++ b/src/nspawn/nspawn.c
@ -5286,7 +5286,7 @@ static int initialize_rlimits(void) {
                 * don't read the other limits from PID 1 but prefer the static table above. */
        };

-        int rl;
+        int rl, r;

        for (rl = 0; rl < _RLIMIT_MAX; rl++) {
                /* Let's only fill in what the user hasn't explicitly configured anyway */
@ -5297,8 +5297,9 @@ static int initialize_rlimits(void) {
                        if (IN_SET(rl, RLIMIT_NPROC, RLIMIT_SIGPENDING)) {
                                /* For these two let's read the limits off PID 1. See above for an explanation. */

-                                if (prlimit(1, rl, NULL, &buffer) < 0)
-                                        return log_error_errno(errno, "Failed to read resource limit RLIMIT_%s of PID 1: %m", rlimit_to_string(rl));
+                                r = pid_getrlimit(1, rl, &buffer);
+                                if (r < 0)
+                                        return log_error_errno(r, "Failed to read resource limit RLIMIT_%s of PID 1: %m", rlimit_to_string(rl));

                                v = &buffer;
                        } else if (rl == RLIMIT_NOFILE) {
--- a/src/test/test-rlimit-util.c
+++ b/src/test/test-rlimit-util.c
@ -1,15 +1,20 @@
 /* SPDX-License-Identifier: LGPL-2.1-or-later */

 #include <sys/resource.h>
+#if HAVE_VALGRIND_VALGRIND_H
+#include <valgrind/valgrind.h>
+#endif

 #include "alloc-util.h"
 #include "capability-util.h"
 #include "macro.h"
 #include "missing_resource.h"
+#include "process-util.h"
 #include "rlimit-util.h"
 #include "string-util.h"
 #include "tests.h"
 #include "time-util.h"
+#include "user-util.h"

 static void test_rlimit_parse_format_one(int resource, const char *string, rlim_t soft, rlim_t hard, int ret, const char *formatted) {
        _cleanup_free_ char *f = NULL;
@ -136,4 +141,45 @@ TEST(setrlimit) {
        assert_se(old.rlim_max == new.rlim_max);
 }

+TEST(pid_getrlimit) {
+        int r;
+
+        /* We fork off a child and read the parent's resource limit from there (i.e. our own), and compare
+         * with what getrlimit() gives us */
+
+        for (int resource = 0; resource < _RLIMIT_MAX; resource++) {
+                struct rlimit direct;
+
+                assert_se(getrlimit(resource, &direct) >= 0);
+
+                /* We fork off a child so that getrlimit() doesn't work anymore */
+                r = safe_fork("(getrlimit)", FORK_RESET_SIGNALS|FORK_DEATHSIG_SIGKILL|FORK_LOG|FORK_WAIT, /* ret_pid= */ NULL);
+                assert_se(r >= 0);
+
+                if (r == 0) {
+                        struct rlimit indirect;
+                        /* child */
+
+                        /* Drop privs, so that prlimit() doesn't work anymore */
+                        (void) setresgid(GID_NOBODY, GID_NOBODY, GID_NOBODY);
+                        (void) setresuid(UID_NOBODY, UID_NOBODY, UID_NOBODY);
+
+                        assert_se(pid_getrlimit(getppid(), resource, &indirect) >= 0);
+
+#if HAVE_VALGRIND_VALGRIND_H
+                        /* Valgrind fakes some changes in RLIMIT_NOFILE getrlimit() returns, work around that */
+                        if (RUNNING_ON_VALGRIND && resource == RLIMIT_NOFILE) {
+                                log_info("Skipping pid_getrlimit() check for RLIMIT_NOFILE, running in valgrind");
+                                _exit(EXIT_SUCCESS);
+                        }
+#endif
+
+                        assert_se(direct.rlim_cur == indirect.rlim_cur);
+                        assert_se(direct.rlim_max == indirect.rlim_max);
+
+                        _exit(EXIT_SUCCESS);
+                }
+        }
+}
+
 DEFINE_TEST_MAIN(LOG_INFO);