From ec0c10fc9db6459d78f0b3970a0f7a34c88e6db3 Mon Sep 17 00:00:00 2001 From: Lennart Poettering Date: Fri, 8 Nov 2024 12:14:16 +0100 Subject: [PATCH 1/3] user-classification: add new "foreign" UID range This makes the UID range configurable via build time options, but of course it really shouldn't be changed. The default range I picked is outside even of IPAs current (ridiculously large) allocation ranges, hence hopefully minimizes conflicts. --- docs/UIDS-GIDS.md | 60 ++++++++++++++++++++-------------- docs/USER_RECORD.md | 7 ++-- meson.build | 4 +++ meson_options.txt | 2 ++ src/basic/uid-classification.c | 2 +- src/basic/uid-classification.h | 12 +++++++ src/core/systemd.pc.in | 2 ++ src/dissect/dissect.c | 2 +- src/shared/group-record.c | 3 ++ src/shared/user-record.c | 4 +++ src/shared/user-record.h | 1 + src/userdb/userdbctl.c | 7 ++++ 12 files changed, 78 insertions(+), 28 deletions(-) diff --git a/docs/UIDS-GIDS.md b/docs/UIDS-GIDS.md index 09488e2a78e..35018b77a42 100644 --- a/docs/UIDS-GIDS.md +++ b/docs/UIDS-GIDS.md @@ -129,10 +129,18 @@ possible. erroneously considers UIDs signed integers, and hence can't deal with values above 2^31. The `systemd-machined.service` service will synthesize user database records for all UIDs assigned to a running container from this range. -Note for both allocation ranges: when a UID allocation takes place NSS is -checked for collisions first, and a different UID is picked if an entry is found. -Thus, the user database is used as synchronization mechanism to ensure -exclusive ownership of UIDs and UID ranges. +4. 2147352576…2147418111 → UID range used for foreign OS images. For various + usecases (primarily: containers) it makes sense to make foreign OS images + available locally whose UID/GID ownerships do not make sense in the local + context but only within the OS image itself. This 64K UID range can be used + to have a clearly defined ownership even on the host, that can be mapped via + idmapped mount to a dynamic runtime UID range as needed. (These numbers in + hexadecimal are 0x7FFE0000…0x7FFEFFFF.) + +Note for the `DynamicUser=` and the `systemd-nspawn` allocation ranges: when a +UID allocation takes place NSS is checked for collisions first, and a different +UID is picked if an entry is found. Thus, the user database is used as +synchronization mechanism to ensure exclusive ownership of UIDs and UID ranges. To ensure compatibility with other subsystems allocating from the same ranges it is hence essential that they ensure that whatever they pick shows up in the user/group databases, either by providing an NSS module, or by adding entries directly to `/etc/passwd` and `/etc/group`. @@ -157,6 +165,8 @@ $ pkg-config --variable=container_uid_base_min systemd 524288 $ pkg-config --variable=container_uid_base_max systemd 1878982656 +$ pkg-config --variable=foreign_uid_base systemd +2147352576 ``` (Note that the latter encodes the maximum UID *base* `systemd-nspawn` might @@ -164,7 +174,7 @@ pick — given that 64K UIDs are assigned to each container according to this allocation logic, the maximum UID used for this range is hence 1878982656+65535=1879048191.) -Systemd has compile-time default for these boundaries. +systemd has compile-time default for these boundaries. Using those defaults is recommended. It will nevertheless query `/etc/login.defs` at runtime, when compiled with `-Dcompat-mutable-uid-boundaries=true` and that file is present. Support for this is considered only a compatibility feature and should not be @@ -244,25 +254,27 @@ i.e. somewhere below `/var/` or similar. ## Summary -| UID/GID | Purpose | Defined By | Listed in | -|-----------------------|-----------------------|---------------|-------------------------------| -| 0 | `root` user | Linux | `/etc/passwd` + `nss-systemd` | -| 1…4 | System users | Distributions | `/etc/passwd` | -| 5 | `tty` group | `systemd` | `/etc/passwd` | -| 6…999 | System users | Distributions | `/etc/passwd` | -| 1000…60000 | Regular users | Distributions | `/etc/passwd` + LDAP/NIS/… | -| 60001…60513 | Human users (homed) | `systemd` | `nss-systemd` | -| 60514…60577 | Host users mapped into containers | `systemd` | `systemd-nspawn` | -| 60578…61183 | Unused | | | -| 61184…65519 | Dynamic service users | `systemd` | `nss-systemd` | -| 65520…65533 | Unused | | | -| 65534 | `nobody` user | Linux | `/etc/passwd` + `nss-systemd` | -| 65535 | 16-bit `(uid_t) -1` | Linux | | -| 65536…524287 | Unused | | | -| 524288…1879048191 | Container UID ranges | `systemd` | `nss-systemd` | -| 1879048192…2147483647 | Unused | | | -| 2147483648…4294967294 | HIC SVNT LEONES | | | -| 4294967295 | 32-bit `(uid_t) -1` | Linux | | +| UID/GID | Same in Hexadecimal | How Many | Purpose | Defined By | Listed in | +|----------------------:|----------------------:|-----------:|:----------------------------------|:--------------|:------------------------------| +| 0 | 0x00000000 | 1 | `root` user | Linux | `/etc/passwd` + `nss-systemd` | +| 1…4 | 0x00000001…0x00000004 | 4 | System users | Distributions | `/etc/passwd` | +| 5 | 0x00000005 | 1 | `tty` group | `systemd` | `/etc/passwd` | +| 6…999 | 0x00000006…0x000003E7 | 994 | System users | Distributions | `/etc/passwd` | +| 1000…60000 | 0x000003E8…0x00001770 | 59000 | Regular users | Distributions | `/etc/passwd` + LDAP/NIS/… | +| 60001…60513 | 0x0000EA61…0x0000EC61 | 513 | Human users (homed) | `systemd` | `nss-systemd` | +| 60514…60577 | 0x0000EC62…0x0000ECA1 | 64 | Host users mapped into containers | `systemd` | `systemd-nspawn` | +| 60578…61183 | 0x0000ECA2…0x0000EEFF | 606 | *unused* | | | +| 61184…65519 | 0x0000EF00…0x0000FFEF | 4336 | Dynamic service users | `systemd` | `nss-systemd` | +| 65520…65533 | 0x0000FFF0…0x0000FFFD | 13 | *unused* | | | +| 65534 | 0x0000FFFE | 1 | `nobody` user | Linux | `/etc/passwd` + `nss-systemd` | +| 65535 | 0x0000FFFF | 1 | 16-bit `(uid_t) -1` | Linux | | +| 65536…524287 | 0x00010000…0x0007FFFF | 458752 | *unused* | | | +| 524288…1879048191 | 0x00080000…0x6FFFFFFF | 1878523904 | Container UID ranges | `systemd` | `nss-systemd` | +| 1879048192…2147352575 | 0x70000000…0x7FFDFFFF | 1879048192 | *unused* | | | +| 2147352576…2147418111 | 0x7FFE0000…0x7FFEFFFF | 65536 | Foreign UID range | `systemd` | `nss-systemd` | +| 2147418112…2147483647 | 0x7FFF0000…0x7FFFFFFF | 65536 | *unused* | | | +| 2147483648…4294967294 | 0x80000000…0xFFFFFFFE | 2147483647 | *HIC SVNT LEONES* | | | +| 4294967295 | 0xFFFFFFFF | 1 | 32-bit `(uid_t) -1` | Linux | | Note that "Unused" in the table above doesn't mean that these ranges are really unused. It just means that these ranges have no well-established diff --git a/docs/USER_RECORD.md b/docs/USER_RECORD.md index 911fceb03f5..1a219fed417 100644 --- a/docs/USER_RECORD.md +++ b/docs/USER_RECORD.md @@ -259,14 +259,17 @@ It's probably wise to use a location string processable by geo-location subsyste Example: `Berlin, Germany` or `Basement, Room 3a`. `disposition` → A string, one of `intrinsic`, `system`, `dynamic`, `regular`, -`container`, `reserved`. If specified clarifies the disposition of the user, +`container`, `foreign`, `reserved`. If specified clarifies the disposition of the user, i.e. the context it is defined in. For regular, "human" users this should be `regular`, for system users (i.e. users that system services run under, and similar) this should be `system`. The `intrinsic` disposition should be used only for the two users that have special meaning to the OS kernel itself, i.e. the `root` and `nobody` users. The `container` string should be used for users that are used by an OS container, and hence will show up in `ps` listings and such, but are only defined in container context. -Finally `reserved` should be used for any users outside of these use-cases. +The `foreign` string should be used for users from UID ranges which are used +for OS images from foreign systems, i.e. where local resolution would not make +sense. +Finally, `reserved` should be used for any users outside of these use-cases. Note that this property is entirely optional and applications are assumed to be able to derive the disposition of a user automatically from a record even in absence of this field, based on other fields, for example the numeric UID. By setting this diff --git a/meson.build b/meson.build index af57c0969b4..6a7e76371ae 100644 --- a/meson.build +++ b/meson.build @@ -877,6 +877,9 @@ container_uid_base_max = get_option('container-uid-base-max') conf.set('CONTAINER_UID_BASE_MIN', container_uid_base_min) conf.set('CONTAINER_UID_BASE_MAX', container_uid_base_max) +foreign_uid_base = get_option('foreign-uid-base') +conf.set('FOREIGN_UID_BASE', foreign_uid_base) + nobody_user = get_option('nobody-user') nobody_group = get_option('nobody-group') @@ -2985,6 +2988,7 @@ summary({ conf.get('SYSTEM_ALLOC_GID_MIN')), 'dynamic UIDs' : '@0@…@1@'.format(dynamic_uid_min, dynamic_uid_max), 'container UID bases' : '@0@…@1@'.format(container_uid_base_min, container_uid_base_max), + 'foreign UID base' : '@0@'.format(foreign_uid_base), 'static UID/GID allocations' : ' '.join(static_ugids), '/dev/kvm access mode' : get_option('dev-kvm-mode'), 'render group access mode' : get_option('group-render-mode'), diff --git a/meson_options.txt b/meson_options.txt index d9242d3b30a..edf8053e514 100644 --- a/meson_options.txt +++ b/meson_options.txt @@ -273,6 +273,8 @@ option('container-uid-base-min', type : 'integer', value : 0x00080000, description : 'minimum container UID base') option('container-uid-base-max', type : 'integer', value : 0x6FFF0000, description : 'maximum container UID base') +option('foreign-uid-base', type : 'integer', value : 0x7FFE0000, + description : 'foreign OS image UID base') option('adm-group', type : 'boolean', description : 'the ACL for adm group should be added') option('wheel-group', type : 'boolean', diff --git a/src/basic/uid-classification.c b/src/basic/uid-classification.c index e2d2cebc6de..88be896ff4b 100644 --- a/src/basic/uid-classification.c +++ b/src/basic/uid-classification.c @@ -127,5 +127,5 @@ bool uid_for_system_journal(uid_t uid) { /* Returns true if the specified UID shall get its data stored in the system journal. */ - return uid_is_system(uid) || uid_is_dynamic(uid) || uid == UID_NOBODY || uid_is_container(uid); + return uid_is_system(uid) || uid_is_dynamic(uid) || uid == UID_NOBODY || uid_is_container(uid) || uid_is_foreign(uid); } diff --git a/src/basic/uid-classification.h b/src/basic/uid-classification.h index 0932123d5cc..2d76be5f04c 100644 --- a/src/basic/uid-classification.h +++ b/src/basic/uid-classification.h @@ -12,6 +12,10 @@ assert_cc((CONTAINER_UID_BASE_MAX & 0xFFFFU) == 0); #define CONTAINER_UID_MIN (CONTAINER_UID_BASE_MIN) #define CONTAINER_UID_MAX (CONTAINER_UID_BASE_MAX + 0xFFFFU) +assert_cc((FOREIGN_UID_BASE & 0xFFFFU) == 0); +#define FOREIGN_UID_MIN (FOREIGN_UID_BASE) +#define FOREIGN_UID_MAX (FOREIGN_UID_BASE + 0xFFFFU) + bool uid_is_system(uid_t uid); bool gid_is_system(gid_t gid); @@ -31,6 +35,14 @@ static inline bool gid_is_container(gid_t gid) { return uid_is_container((uid_t) gid); } +static inline bool uid_is_foreign(uid_t uid) { + return FOREIGN_UID_MIN <= uid && uid <= FOREIGN_UID_MAX; +} + +static inline bool gid_is_foreign(gid_t gid) { + return uid_is_foreign((uid_t) gid); +} + typedef struct UGIDAllocationRange { uid_t system_alloc_uid_min; uid_t system_uid_max; diff --git a/src/core/systemd.pc.in b/src/core/systemd.pc.in index f3b85b01909..8d044dd7ad1 100644 --- a/src/core/systemd.pc.in +++ b/src/core/systemd.pc.in @@ -102,6 +102,8 @@ containeruidbasemin=${container_uid_base_min} container_uid_base_max={{CONTAINER_UID_BASE_MAX}} containeruidbasemax=${container_uid_base_max} +foreign_uid_base={{FOREIGN_UID_BASE}} + Name: systemd Description: systemd System and Service Manager URL: {{PROJECT_URL}} diff --git a/src/dissect/dissect.c b/src/dissect/dissect.c index 91f0f1de136..cb51d713843 100644 --- a/src/dissect/dissect.c +++ b/src/dissect/dissect.c @@ -1199,7 +1199,7 @@ static const char *pick_color_for_uid_gid(uid_t uid) { return ansi_normal(); /* files in disk images are typically owned by root and other system users, no issue there */ if (uid_is_dynamic(uid)) return ansi_highlight_red(); /* files should never be owned persistently by dynamic users, and there are just no excuses */ - if (uid_is_container(uid)) + if (uid_is_container(uid) || uid_is_foreign(uid)) return ansi_highlight_cyan(); return ansi_highlight(); diff --git a/src/shared/group-record.c b/src/shared/group-record.c index 4898616252c..eea60af3346 100644 --- a/src/shared/group-record.c +++ b/src/shared/group-record.c @@ -303,6 +303,9 @@ UserDisposition group_record_disposition(GroupRecord *h) { if (gid_is_container(h->gid)) return USER_CONTAINER; + if (gid_is_foreign(h->gid)) + return USER_FOREIGN; + if (h->gid > INT32_MAX) return USER_RESERVED; diff --git a/src/shared/user-record.c b/src/shared/user-record.c index 131949a4e4f..88970425cc6 100644 --- a/src/shared/user-record.c +++ b/src/shared/user-record.c @@ -1993,6 +1993,9 @@ UserDisposition user_record_disposition(UserRecord *h) { if (uid_is_container(h->uid)) return USER_CONTAINER; + if (uid_is_foreign(h->uid)) + return USER_FOREIGN; + if (h->uid > INT32_MAX) return USER_RESERVED; @@ -2712,6 +2715,7 @@ static const char* const user_disposition_table[_USER_DISPOSITION_MAX] = { [USER_DYNAMIC] = "dynamic", [USER_REGULAR] = "regular", [USER_CONTAINER] = "container", + [USER_FOREIGN] = "foreign", [USER_RESERVED] = "reserved", }; diff --git a/src/shared/user-record.h b/src/shared/user-record.h index b539b3f55e3..d3decdb5c1f 100644 --- a/src/shared/user-record.h +++ b/src/shared/user-record.h @@ -17,6 +17,7 @@ typedef enum UserDisposition { USER_DYNAMIC, /* dynamically allocated users for system services */ USER_REGULAR, /* regular (typically human users) */ USER_CONTAINER, /* UID ranges allocated for container uses */ + USER_FOREIGN, /* UID range allocated for foreign OS images */ USER_RESERVED, /* Range above 2^31 */ _USER_DISPOSITION_MAX, _USER_DISPOSITION_INVALID = -EINVAL, diff --git a/src/userdb/userdbctl.c b/src/userdb/userdbctl.c index a803df8b0b5..5a0359dccf1 100644 --- a/src/userdb/userdbctl.c +++ b/src/userdb/userdbctl.c @@ -61,6 +61,7 @@ static const char *user_disposition_to_color(UserDisposition d) { return ansi_green(); case USER_CONTAINER: + case USER_FOREIGN: return ansi_cyan(); case USER_RESERVED: @@ -170,6 +171,12 @@ static const struct { .name = "container", .disposition = USER_CONTAINER, }, + { + .first = FOREIGN_UID_MIN, + .last = FOREIGN_UID_MAX, + .name = "foreign", + .disposition = USER_FOREIGN, + }, #if ENABLE_HOMED { .first = HOME_UID_MIN, From 44eb6b81db77216e4f34e1de37eda133d7db6945 Mon Sep 17 00:00:00 2001 From: Lennart Poettering Date: Tue, 12 Nov 2024 17:04:11 +0100 Subject: [PATCH 2/3] userdb: synthesize stub user records for the foreign UID --- man/userdbctl.xml | 6 +- src/nspawn/nspawn-bind-user.c | 4 +- src/nss-systemd/nss-systemd.c | 12 ++-- src/shared/userdb.c | 127 +++++++++++++++++++++++++++++++--- src/shared/userdb.h | 19 ++--- src/userdb/userdbctl.c | 4 +- 6 files changed, 142 insertions(+), 30 deletions(-) diff --git a/man/userdbctl.xml b/man/userdbctl.xml index 56d0068e735..f7b0c1d9ebb 100644 --- a/man/userdbctl.xml +++ b/man/userdbctl.xml @@ -136,9 +136,9 @@ Controls whether to synthesize records for the root and nobody users/groups if they - are not defined otherwise. By default (or with yes), such records are implicitly - synthesized if otherwise missing since they have special significance to the OS. When - no, this synthesizing is turned off. + are not defined otherwise, as well as the user/groups for the "foreign" UID range. By default (or with + yes), such records are implicitly synthesized if otherwise missing since they have + special significance to the OS. When no, this synthesizing is turned off. diff --git a/src/nspawn/nspawn-bind-user.c b/src/nspawn/nspawn-bind-user.c index d64a89f161c..749accdce8e 100644 --- a/src/nspawn/nspawn-bind-user.c +++ b/src/nspawn/nspawn-bind-user.c @@ -231,7 +231,7 @@ int bind_user_prepare( _cleanup_(group_record_unrefp) GroupRecord *g = NULL, *cg = NULL; _cleanup_free_ char *sm = NULL, *sd = NULL; - r = userdb_by_name(*n, USERDB_DONT_SYNTHESIZE, &u); + r = userdb_by_name(*n, USERDB_DONT_SYNTHESIZE_INTRINSIC|USERDB_DONT_SYNTHESIZE_FOREIGN, &u); if (r < 0) return log_error_errno(r, "Failed to resolve user '%s': %m", *n); @@ -252,7 +252,7 @@ int bind_user_prepare( if (u->uid >= uid_shift && u->uid < uid_shift + uid_range) return log_error_errno(SYNTHETIC_ERRNO(EINVAL), "UID of user '%s' to map is already in container UID range, refusing.", u->user_name); - r = groupdb_by_gid(u->gid, USERDB_DONT_SYNTHESIZE, &g); + r = groupdb_by_gid(u->gid, USERDB_DONT_SYNTHESIZE_INTRINSIC|USERDB_DONT_SYNTHESIZE_FOREIGN, &g); if (r < 0) return log_error_errno(r, "Failed to resolve group of user '%s': %m", u->user_name); diff --git a/src/nss-systemd/nss-systemd.c b/src/nss-systemd/nss-systemd.c index 8e8d4cf1cb6..d686d920fc9 100644 --- a/src/nss-systemd/nss-systemd.c +++ b/src/nss-systemd/nss-systemd.c @@ -615,7 +615,7 @@ enum nss_status _nss_systemd_setpwent(int stayopen) { * (think: LDAP/NIS type situations), and our synthesizing of root/nobody is a robustness fallback * only, which matters for getpwnam()/getpwuid() primarily, which are the main NSS entrypoints to the * user database. */ - r = userdb_all(nss_glue_userdb_flags() | USERDB_DONT_SYNTHESIZE, &getpwent_data.iterator); + r = userdb_all(nss_glue_userdb_flags() | USERDB_DONT_SYNTHESIZE_INTRINSIC | USERDB_DONT_SYNTHESIZE_FOREIGN, &getpwent_data.iterator); return r < 0 ? NSS_STATUS_UNAVAIL : NSS_STATUS_SUCCESS; } @@ -634,8 +634,8 @@ enum nss_status _nss_systemd_setgrent(int stayopen) { getgrent_data.iterator = userdb_iterator_free(getgrent_data.iterator); getgrent_data.by_membership = false; - /* See _nss_systemd_setpwent() for an explanation why we use USERDB_DONT_SYNTHESIZE here */ - r = groupdb_all(nss_glue_userdb_flags() | USERDB_DONT_SYNTHESIZE, &getgrent_data.iterator); + /* See _nss_systemd_setpwent() for an explanation why we use USERDB_DONT_SYNTHESIZE_INTRINSIC here */ + r = groupdb_all(nss_glue_userdb_flags() | USERDB_DONT_SYNTHESIZE_INTRINSIC | USERDB_DONT_SYNTHESIZE_FOREIGN, &getgrent_data.iterator); return r < 0 ? NSS_STATUS_UNAVAIL : NSS_STATUS_SUCCESS; } @@ -654,8 +654,8 @@ enum nss_status _nss_systemd_setspent(int stayopen) { getspent_data.iterator = userdb_iterator_free(getspent_data.iterator); getspent_data.by_membership = false; - /* See _nss_systemd_setpwent() for an explanation why we use USERDB_DONT_SYNTHESIZE here */ - r = userdb_all(nss_glue_userdb_flags() | USERDB_DONT_SYNTHESIZE, &getspent_data.iterator); + /* See _nss_systemd_setpwent() for an explanation why we use USERDB_DONT_SYNTHESIZE_INTRINSIC here */ + r = userdb_all(nss_glue_userdb_flags() | USERDB_DONT_SYNTHESIZE_INTRINSIC | USERDB_DONT_SYNTHESIZE_FOREIGN, &getspent_data.iterator); return r < 0 ? NSS_STATUS_UNAVAIL : NSS_STATUS_SUCCESS; } @@ -675,7 +675,7 @@ enum nss_status _nss_systemd_setsgent(int stayopen) { getsgent_data.by_membership = false; /* See _nss_systemd_setpwent() for an explanation why we use USERDB_DONT_SYNTHESIZE here */ - r = groupdb_all(nss_glue_userdb_flags() | USERDB_DONT_SYNTHESIZE, &getsgent_data.iterator); + r = groupdb_all(nss_glue_userdb_flags() | USERDB_DONT_SYNTHESIZE_INTRINSIC | USERDB_DONT_SYNTHESIZE_FOREIGN, &getsgent_data.iterator); return r < 0 ? NSS_STATUS_UNAVAIL : NSS_STATUS_SUCCESS; } diff --git a/src/shared/userdb.c b/src/shared/userdb.c index ff83d4bf902..c7334e820d5 100644 --- a/src/shared/userdb.c +++ b/src/shared/userdb.c @@ -16,10 +16,11 @@ #include "set.h" #include "socket-util.h" #include "strv.h" +#include "uid-classification.h" #include "user-record-nss.h" #include "user-util.h" -#include "userdb-dropin.h" #include "userdb.h" +#include "userdb-dropin.h" DEFINE_PRIVATE_HASH_OPS_WITH_VALUE_DESTRUCTOR(link_hash_ops, void, trivial_hash_func, trivial_compare_func, sd_varlink, sd_varlink_unref); @@ -116,8 +117,8 @@ static UserDBIterator* userdb_iterator_new(LookupWhat what, UserDBFlags flags) { *i = (UserDBIterator) { .what = what, .flags = flags, - .synthesize_root = !FLAGS_SET(flags, USERDB_DONT_SYNTHESIZE), - .synthesize_nobody = !FLAGS_SET(flags, USERDB_DONT_SYNTHESIZE), + .synthesize_root = !FLAGS_SET(flags, USERDB_DONT_SYNTHESIZE_INTRINSIC), + .synthesize_nobody = !FLAGS_SET(flags, USERDB_DONT_SYNTHESIZE_INTRINSIC), }; return i; @@ -434,7 +435,7 @@ static int userdb_start_query( } /* First, let's talk to the multiplexer, if we can */ - if ((flags & (USERDB_AVOID_MULTIPLEXER|USERDB_EXCLUDE_DYNAMIC_USER|USERDB_EXCLUDE_NSS|USERDB_EXCLUDE_DROPIN|USERDB_DONT_SYNTHESIZE)) == 0 && + if ((flags & (USERDB_AVOID_MULTIPLEXER|USERDB_EXCLUDE_DYNAMIC_USER|USERDB_EXCLUDE_NSS|USERDB_EXCLUDE_DROPIN|USERDB_DONT_SYNTHESIZE_INTRINSIC|USERDB_DONT_SYNTHESIZE_FOREIGN)) == 0 && !strv_contains(except, "io.systemd.Multiplexer") && (!only || strv_contains(only, "io.systemd.Multiplexer"))) { _cleanup_(sd_json_variant_unrefp) sd_json_variant *patched_query = sd_json_variant_ref(query); @@ -617,6 +618,63 @@ static int synthetic_nobody_user_build(UserRecord **ret) { SD_JSON_BUILD_PAIR("disposition", JSON_BUILD_CONST_STRING("intrinsic")))); } +static int synthetic_foreign_user_build(uid_t foreign_uid, UserRecord **ret) { + assert(ret); + + if (!uid_is_valid(foreign_uid)) + return -ESRCH; + if (foreign_uid > 0xFFFF) + return -ESRCH; + + _cleanup_free_ char *un = NULL; + if (asprintf(&un, "foreign-" UID_FMT, foreign_uid) < 0) + return -ENOMEM; + + _cleanup_free_ char *rn = NULL; + if (asprintf(&rn, "Foreign System Image UID " UID_FMT, foreign_uid) < 0) + return -ENOMEM; + + return user_record_build( + ret, + SD_JSON_BUILD_OBJECT( + SD_JSON_BUILD_PAIR("userName", SD_JSON_BUILD_STRING(un)), + SD_JSON_BUILD_PAIR("realName", SD_JSON_BUILD_STRING(rn)), + SD_JSON_BUILD_PAIR("uid", SD_JSON_BUILD_UNSIGNED(FOREIGN_UID_BASE + foreign_uid)), + SD_JSON_BUILD_PAIR("gid", SD_JSON_BUILD_UNSIGNED(FOREIGN_UID_BASE + foreign_uid)), + SD_JSON_BUILD_PAIR("shell", JSON_BUILD_CONST_STRING(NOLOGIN)), + SD_JSON_BUILD_PAIR("locked", SD_JSON_BUILD_BOOLEAN(true)), + SD_JSON_BUILD_PAIR("disposition", JSON_BUILD_CONST_STRING("foreign")))); +} + +static int user_name_foreign_extract_uid(const char *name, uid_t *ret_uid) { + int r; + + assert(name); + assert(ret_uid); + + /* Parses the inner UID from a user name of the foreign UID range, in the form "foreign-NNN". Returns + * > 0 if that worked, 0 if it didn't. */ + + const char *e = startswith(name, "foreign-"); + if (!e) + goto nomatch; + + uid_t uid; + r = parse_uid(e, &uid); + if (r < 0) + goto nomatch; + + if (uid > 0xFFFF) + goto nomatch; + + *ret_uid = uid; + return 1; + +nomatch: + *ret_uid = UID_INVALID; + return 0; +} + int userdb_by_name(const char *name, UserDBFlags flags, UserRecord **ret) { _cleanup_(userdb_iterator_freep) UserDBIterator *iterator = NULL; _cleanup_(sd_json_variant_unrefp) sd_json_variant *query = NULL; @@ -658,7 +716,7 @@ int userdb_by_name(const char *name, UserDBFlags flags, UserRecord **ret) { } } - if (!FLAGS_SET(flags, USERDB_DONT_SYNTHESIZE)) { + if (!FLAGS_SET(flags, USERDB_DONT_SYNTHESIZE_INTRINSIC)) { if (streq(name, "root")) return synthetic_root_user_build(ret); @@ -666,6 +724,16 @@ int userdb_by_name(const char *name, UserDBFlags flags, UserRecord **ret) { return synthetic_nobody_user_build(ret); } + if (!FLAGS_SET(flags, USERDB_DONT_SYNTHESIZE_FOREIGN)) { + uid_t foreign_uid; + r = user_name_foreign_extract_uid(name, &foreign_uid); + if (r < 0) + return r; + if (r > 0) + return synthetic_foreign_user_build(foreign_uid, ret); + r = -ESRCH; + } + return r; } @@ -708,7 +776,7 @@ int userdb_by_uid(uid_t uid, UserDBFlags flags, UserRecord **ret) { } } - if (!FLAGS_SET(flags, USERDB_DONT_SYNTHESIZE)) { + if (!FLAGS_SET(flags, USERDB_DONT_SYNTHESIZE_INTRINSIC)) { if (uid == 0) return synthetic_root_user_build(ret); @@ -716,6 +784,9 @@ int userdb_by_uid(uid_t uid, UserDBFlags flags, UserRecord **ret) { return synthetic_nobody_user_build(ret); } + if (!FLAGS_SET(flags, USERDB_DONT_SYNTHESIZE_FOREIGN) && uid_is_foreign(uid)) + return synthetic_foreign_user_build(uid - FOREIGN_UID_BASE, ret); + return r; } @@ -751,6 +822,8 @@ int userdb_all(UserDBFlags flags, UserDBIterator **ret) { log_debug_errno(r, "Failed to find user drop-ins, ignoring: %m"); } + /* Note that we do not enumerate the foreign users, since those would be just 64K of noise */ + /* propagate IPC error, but only if there are no drop-ins */ if (qr < 0 && !iterator->nss_iterating && @@ -888,6 +961,31 @@ static int synthetic_nobody_group_build(GroupRecord **ret) { SD_JSON_BUILD_PAIR("disposition", JSON_BUILD_CONST_STRING("intrinsic")))); } +static int synthetic_foreign_group_build(gid_t foreign_gid, GroupRecord **ret) { + assert(ret); + + if (!gid_is_valid(foreign_gid)) + return -ESRCH; + if (foreign_gid > 0xFFFF) + return -ESRCH; + + _cleanup_free_ char *gn = NULL; + if (asprintf(&gn, "foreign-" GID_FMT, foreign_gid) < 0) + return -ENOMEM; + + _cleanup_free_ char *d = NULL; + if (asprintf(&d, "Foreign System Image GID " GID_FMT, foreign_gid) < 0) + return -ENOMEM; + + return group_record_build( + ret, + SD_JSON_BUILD_OBJECT( + SD_JSON_BUILD_PAIR("groupName", SD_JSON_BUILD_STRING(gn)), + SD_JSON_BUILD_PAIR("description", SD_JSON_BUILD_STRING(d)), + SD_JSON_BUILD_PAIR("gid", SD_JSON_BUILD_UNSIGNED(FOREIGN_UID_BASE + foreign_gid)), + SD_JSON_BUILD_PAIR("disposition", JSON_BUILD_CONST_STRING("foreign")))); +} + int groupdb_by_name(const char *name, UserDBFlags flags, GroupRecord **ret) { _cleanup_(userdb_iterator_freep) UserDBIterator *iterator = NULL; _cleanup_(sd_json_variant_unrefp) sd_json_variant *query = NULL; @@ -926,7 +1024,7 @@ int groupdb_by_name(const char *name, UserDBFlags flags, GroupRecord **ret) { } } - if (!FLAGS_SET(flags, USERDB_DONT_SYNTHESIZE)) { + if (!FLAGS_SET(flags, USERDB_DONT_SYNTHESIZE_INTRINSIC)) { if (streq(name, "root")) return synthetic_root_group_build(ret); @@ -934,6 +1032,16 @@ int groupdb_by_name(const char *name, UserDBFlags flags, GroupRecord **ret) { return synthetic_nobody_group_build(ret); } + if (!FLAGS_SET(flags, USERDB_DONT_SYNTHESIZE_FOREIGN)) { + uid_t foreign_gid; + r = user_name_foreign_extract_uid(name, &foreign_gid); /* Same for UID + GID */ + if (r < 0) + return r; + if (r > 0) + return synthetic_foreign_group_build(foreign_gid, ret); + r = -ESRCH; + } + return r; } @@ -975,7 +1083,7 @@ int groupdb_by_gid(gid_t gid, UserDBFlags flags, GroupRecord **ret) { } } - if (!FLAGS_SET(flags, USERDB_DONT_SYNTHESIZE)) { + if (!FLAGS_SET(flags, USERDB_DONT_SYNTHESIZE_INTRINSIC)) { if (gid == 0) return synthetic_root_group_build(ret); @@ -983,6 +1091,9 @@ int groupdb_by_gid(gid_t gid, UserDBFlags flags, GroupRecord **ret) { return synthetic_nobody_group_build(ret); } + if (!FLAGS_SET(flags, USERDB_DONT_SYNTHESIZE_FOREIGN) && gid_is_foreign(gid)) + return synthetic_foreign_group_build(gid - FOREIGN_UID_BASE, ret); + return r; } diff --git a/src/shared/userdb.h b/src/shared/userdb.h index 75eb4b2dce8..daf87fb5cf5 100644 --- a/src/shared/userdb.h +++ b/src/shared/userdb.h @@ -16,19 +16,20 @@ DEFINE_TRIVIAL_CLEANUP_FUNC(UserDBIterator*, userdb_iterator_free); typedef enum UserDBFlags { /* The main sources */ - USERDB_EXCLUDE_NSS = 1 << 0, /* don't do client-side nor server-side NSS */ - USERDB_EXCLUDE_VARLINK = 1 << 1, /* don't talk to any varlink services */ - USERDB_EXCLUDE_DROPIN = 1 << 2, /* don't load drop-in user/group definitions */ + USERDB_EXCLUDE_NSS = 1 << 0, /* don't do client-side nor server-side NSS */ + USERDB_EXCLUDE_VARLINK = 1 << 1, /* don't talk to any varlink services */ + USERDB_EXCLUDE_DROPIN = 1 << 2, /* don't load drop-in user/group definitions */ /* Modifications */ - USERDB_SUPPRESS_SHADOW = 1 << 3, /* don't do client-side shadow calls (server side might happen though) */ - USERDB_EXCLUDE_DYNAMIC_USER = 1 << 4, /* exclude looking up in io.systemd.DynamicUser */ - USERDB_AVOID_MULTIPLEXER = 1 << 5, /* exclude looking up via io.systemd.Multiplexer */ - USERDB_DONT_SYNTHESIZE = 1 << 6, /* don't synthesize root/nobody */ + USERDB_SUPPRESS_SHADOW = 1 << 3, /* don't do client-side shadow calls (server side might happen though) */ + USERDB_EXCLUDE_DYNAMIC_USER = 1 << 4, /* exclude looking up in io.systemd.DynamicUser */ + USERDB_AVOID_MULTIPLEXER = 1 << 5, /* exclude looking up via io.systemd.Multiplexer */ + USERDB_DONT_SYNTHESIZE_INTRINSIC = 1 << 6, /* don't synthesize root/nobody */ + USERDB_DONT_SYNTHESIZE_FOREIGN = 1 << 7, /* don't synthesize foreign UID records */ /* Combinations */ - USERDB_NSS_ONLY = USERDB_EXCLUDE_VARLINK|USERDB_EXCLUDE_DROPIN|USERDB_DONT_SYNTHESIZE, - USERDB_DROPIN_ONLY = USERDB_EXCLUDE_NSS|USERDB_EXCLUDE_VARLINK|USERDB_DONT_SYNTHESIZE, + USERDB_NSS_ONLY = USERDB_EXCLUDE_VARLINK|USERDB_EXCLUDE_DROPIN|USERDB_DONT_SYNTHESIZE_INTRINSIC|USERDB_DONT_SYNTHESIZE_FOREIGN, + USERDB_DROPIN_ONLY = USERDB_EXCLUDE_NSS|USERDB_EXCLUDE_VARLINK|USERDB_DONT_SYNTHESIZE_INTRINSIC|USERDB_DONT_SYNTHESIZE_FOREIGN, } UserDBFlags; /* Well-known errors we'll return here: diff --git a/src/userdb/userdbctl.c b/src/userdb/userdbctl.c index 5a0359dccf1..ee6e6c869a0 100644 --- a/src/userdb/userdbctl.c +++ b/src/userdb/userdbctl.c @@ -1337,7 +1337,7 @@ static int parse_argv(int argc, char *argv[]) { break; case 'N': - arg_userdb_flags |= USERDB_EXCLUDE_NSS|USERDB_DONT_SYNTHESIZE; + arg_userdb_flags |= USERDB_EXCLUDE_NSS|USERDB_DONT_SYNTHESIZE_INTRINSIC|USERDB_DONT_SYNTHESIZE_FOREIGN; break; case ARG_WITH_NSS: @@ -1369,7 +1369,7 @@ static int parse_argv(int argc, char *argv[]) { if (r < 0) return r; - SET_FLAG(arg_userdb_flags, USERDB_DONT_SYNTHESIZE, !r); + SET_FLAG(arg_userdb_flags, USERDB_DONT_SYNTHESIZE_INTRINSIC|USERDB_DONT_SYNTHESIZE_FOREIGN, !r); break; case ARG_MULTIPLEXER: From 55e4946f9ca75c35e87ff7f0c0d871e0d80e8ca0 Mon Sep 17 00:00:00 2001 From: Lennart Poettering Date: Tue, 12 Nov 2024 09:44:48 +0100 Subject: [PATCH 3/3] dissect: add new --shift command --- man/systemd-dissect.xml | 24 ++++++++++++++++++++ src/dissect/dissect.c | 50 +++++++++++++++++++++++++++++++++++++++-- 2 files changed, 72 insertions(+), 2 deletions(-) diff --git a/man/systemd-dissect.xml b/man/systemd-dissect.xml index 3aaa1744f3e..2718feccb75 100644 --- a/man/systemd-dissect.xml +++ b/man/systemd-dissect.xml @@ -62,6 +62,9 @@ systemd-dissect OPTIONS --validate IMAGE + + systemd-dissect OPTIONS --shift IMAGE UIDBASE + @@ -350,6 +353,27 @@ + + + + Recursively iterates through all inodes of the specified image and shifts the UIDs + and GIDs the inodes are owned by into the specified UID range. Takes an image path and a UID base as + parameter. The UID base can be specified numerically (in which case it must be a multiple of 65536, + and either 0 or within the container or foreign UID range, as per Users, Groups, UIDs and GIDs on systemd Systems), or as + the symbolic identifier foreign which is shorthand to the foreign UID base. This + command is useful for preparing directory container images for unprivileged use. Note that this + command is intended for images that use the 16bit UIDs/GIDs range only, and it always ignores the + upper 16bit of the current UID/GID ownership, combining the lower 16 bit with the target UID + base. + + Use systemd-dissect --shift /some/container/tree foreign to shift a + container image into the foreign UID range, or systemd-dissect --shift /some/container/tree + 0 to shift it to host UID range. + + + + diff --git a/src/dissect/dissect.c b/src/dissect/dissect.c index cb51d713843..3ca1e17be4b 100644 --- a/src/dissect/dissect.c +++ b/src/dissect/dissect.c @@ -45,6 +45,7 @@ #include "process-util.h" #include "recurse-dir.h" #include "sha256.h" +#include "shift-uid.h" #include "stat-util.h" #include "string-util.h" #include "strv.h" @@ -68,6 +69,7 @@ static enum { ACTION_DISCOVER, ACTION_VALIDATE, ACTION_MAKE_ARCHIVE, + ACTION_SHIFT, } arg_action = ACTION_DISSECT; static char *arg_image = NULL; static char *arg_root = NULL; @@ -97,6 +99,7 @@ static bool arg_mtree_hash = true; static bool arg_via_service = false; static RuntimeScope arg_runtime_scope = _RUNTIME_SCOPE_INVALID; static bool arg_all = false; +static uid_t arg_uid_base = UID_INVALID; STATIC_DESTRUCTOR_REGISTER(arg_image, freep); STATIC_DESTRUCTOR_REGISTER(arg_root, freep); @@ -129,6 +132,7 @@ static int help(void) { "%1$s [OPTIONS...] --make-archive IMAGE [TARGET]\n" "%1$s [OPTIONS...] --discover\n" "%1$s [OPTIONS...] --validate IMAGE\n" + "%1$s [OPTIONS...] --shift IMAGE UIDBASE\n" "\n%5$sDissect a Discoverable Disk Image (DDI).%6$s\n\n" "%3$sOptions:%4$s\n" " --no-pager Do not pipe output into a pager\n" @@ -174,6 +178,7 @@ static int help(void) { " --make-archive Convert the DDI to an archive file\n" " --discover Discover DDIs in well known directories\n" " --validate Validate image and image policy\n" + " --shift Shift UID range to selected base\n" "\nSee the %2$s for details.\n", program_invocation_short_name, link, @@ -279,6 +284,7 @@ static int parse_argv(int argc, char *argv[]) { ARG_VALIDATE, ARG_MTREE_HASH, ARG_MAKE_ARCHIVE, + ARG_SHIFT, ARG_SYSTEM, ARG_USER, ARG_ALL, @@ -315,6 +321,7 @@ static int parse_argv(int argc, char *argv[]) { { "validate", no_argument, NULL, ARG_VALIDATE }, { "mtree-hash", required_argument, NULL, ARG_MTREE_HASH }, { "make-archive", no_argument, NULL, ARG_MAKE_ARCHIVE }, + { "shift", no_argument, NULL, ARG_SHIFT }, { "system", no_argument, NULL, ARG_SYSTEM }, { "user", no_argument, NULL, ARG_USER }, { "all", no_argument, NULL, ARG_ALL }, @@ -550,6 +557,10 @@ static int parse_argv(int argc, char *argv[]) { arg_action = ACTION_MAKE_ARCHIVE; break; + case ARG_SHIFT: + arg_action = ACTION_SHIFT; + break; + case ARG_SYSTEM: system_scope_requested = true; break; @@ -731,6 +742,33 @@ static int parse_argv(int argc, char *argv[]) { arg_flags &= ~(DISSECT_IMAGE_PIN_PARTITION_DEVICES|DISSECT_IMAGE_ADD_PARTITION_DEVICES); break; + case ACTION_SHIFT: + if (optind + 2 != argc) + return log_error_errno(SYNTHETIC_ERRNO(EINVAL), + "Expected an image path and a UID base as only argument."); + + r = parse_image_path_argument(argv[optind], &arg_root, &arg_image); + if (r < 0) + return r; + + if (streq(argv[optind + 1], "foreign")) + arg_uid_base = FOREIGN_UID_BASE; + else { + r = parse_uid(argv[optind + 1], &arg_uid_base); + if (r < 0) + return log_error_errno(r, "Failed to parse UID base: %s", argv[optind + 1]); + + if ((arg_uid_base & 0xFFFF) != 0) + return log_error_errno(SYNTHETIC_ERRNO(EINVAL), "Selected UID base not a multiple of 64K: " UID_FMT, arg_uid_base); + if (arg_uid_base != 0 && + !uid_is_container(arg_uid_base) && + !uid_is_foreign(arg_uid_base)) + return log_error_errno(SYNTHETIC_ERRNO(EINVAL), "Selected UID range is not in the container range, nor the foreign one, refusing."); + } + + arg_flags |= DISSECT_IMAGE_REQUIRE_ROOT; + break; + default: assert_not_reached(); } @@ -1444,7 +1482,7 @@ static int action_list_or_mtree_or_copy_or_make_archive(DissectedImage *m, LoopD const char *root; int r; - assert(IN_SET(arg_action, ACTION_LIST, ACTION_MTREE, ACTION_COPY_FROM, ACTION_COPY_TO, ACTION_MAKE_ARCHIVE)); + assert(IN_SET(arg_action, ACTION_LIST, ACTION_MTREE, ACTION_COPY_FROM, ACTION_COPY_TO, ACTION_MAKE_ARCHIVE, ACTION_SHIFT)); if (arg_image) { assert(m); @@ -1699,6 +1737,13 @@ static int action_list_or_mtree_or_copy_or_make_archive(DissectedImage *m, LoopD #endif } + case ACTION_SHIFT: + r = path_patch_uid(root, arg_uid_base, 0x10000); + if (r < 0) + return log_error_errno(r, "Failed to shift UID base: %m"); + + return 0; + default: assert_not_reached(); } @@ -2121,7 +2166,7 @@ static int run(int argc, char *argv[]) { else r = loop_device_make_by_path(arg_image, open_flags, /* sector_size= */ UINT32_MAX, loop_flags, LOCK_SH, &d); if (r < 0) { - if (!ERRNO_IS_PRIVILEGE(r) || !IN_SET(arg_action, ACTION_DISSECT, ACTION_LIST, ACTION_MTREE, ACTION_COPY_FROM, ACTION_COPY_TO)) + if (!ERRNO_IS_PRIVILEGE(r) || !IN_SET(arg_action, ACTION_DISSECT, ACTION_LIST, ACTION_MTREE, ACTION_COPY_FROM, ACTION_COPY_TO, ACTION_SHIFT)) return log_error_errno(r, "Failed to set up loopback device for %s: %m", arg_image); log_debug_errno(r, "Lacking permissions to set up loopback block device for %s, using service: %m", arg_image); @@ -2206,6 +2251,7 @@ static int run(int argc, char *argv[]) { case ACTION_COPY_FROM: case ACTION_COPY_TO: case ACTION_MAKE_ARCHIVE: + case ACTION_SHIFT: return action_list_or_mtree_or_copy_or_make_archive(m, d, userns_fd); case ACTION_WITH: