mirror of
git://git.proxmox.com/git/lxc.git
synced 2025-03-16 10:50:38 +03:00
update to master / lxc-4.0.0+fixes
Signed-off-by: Wolfgang Bumiller <w.bumiller@proxmox.com>
This commit is contained in:
parent
f101bb5779
commit
3e9264ba48
@ -1,23 +1,25 @@
|
||||
From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
|
||||
From 55e20fb88be7512c80baa7514bc815e4cec4d22c Mon Sep 17 00:00:00 2001
|
||||
From: Wolfgang Bumiller <w.bumiller@proxmox.com>
|
||||
Date: Fri, 10 Feb 2017 09:13:40 +0100
|
||||
Subject: [PATCH] PVE: [Config] lxc.service: start after a potential
|
||||
Subject: [PATCH lxc 1/3] PVE: [Config] lxc.service: start after a potential
|
||||
syslog.service
|
||||
|
||||
We could add this as a snippet from pve-container instead.
|
||||
|
||||
Signed-off-by: Wolfgang Bumiller <w.bumiller@proxmox.com>
|
||||
---
|
||||
config/init/systemd/lxc.service.in | 2 +-
|
||||
1 file changed, 1 insertion(+), 1 deletion(-)
|
||||
|
||||
diff --git a/config/init/systemd/lxc.service.in b/config/init/systemd/lxc.service.in
|
||||
index cd6199671..77541917e 100644
|
||||
index e3745dc2d..fbe46f1d7 100644
|
||||
--- a/config/init/systemd/lxc.service.in
|
||||
+++ b/config/init/systemd/lxc.service.in
|
||||
@@ -1,6 +1,6 @@
|
||||
[Unit]
|
||||
Description=LXC Container Initialization and Autoboot Code
|
||||
-After=network.target lxc-net.service
|
||||
+After=syslog.service network.target lxc-net.service
|
||||
-After=network.target lxc-net.service remote-fs.target
|
||||
+After=network.target lxc-net.service remote-fs.target syslog.target
|
||||
Wants=lxc-net.service
|
||||
Documentation=man:lxc-autostart man:lxc
|
||||
|
||||
|
@ -1,7 +1,10 @@
|
||||
From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
|
||||
From a1a53a511f065d68520337cbbba627b4164da865 Mon Sep 17 00:00:00 2001
|
||||
From: =?UTF-8?q?Fabian=20Gr=C3=BCnbichler?= <f.gruenbichler@proxmox.com>
|
||||
Date: Wed, 9 Nov 2016 09:14:26 +0100
|
||||
Subject: [PATCH] PVE: [Config] deny rw mounting of /sys and /proc
|
||||
Subject: [PATCH lxc 2/3] PVE: [Config] deny rw mounting of /sys and /proc
|
||||
|
||||
Note that we don't actually make use of this anymore, since
|
||||
we switched to the generated profiles which already do this.
|
||||
|
||||
this would allow root in a privileged container to change
|
||||
the permissions of /sys on the host, which could lock out
|
@ -1,31 +0,0 @@
|
||||
From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
|
||||
From: Wolfgang Bumiller <w.bumiller@proxmox.com>
|
||||
Date: Fri, 10 Feb 2017 09:15:37 +0100
|
||||
Subject: [PATCH] PVE: [Down] run lxcnetaddbr when instantiating veths
|
||||
|
||||
FIXME: Why aren't we using regular up-scripts?
|
||||
|
||||
Signed-off-by: Wolfgang Bumiller <w.bumiller@proxmox.com>
|
||||
---
|
||||
src/lxc/network.c | 5 +++++
|
||||
1 file changed, 5 insertions(+)
|
||||
|
||||
diff --git a/src/lxc/network.c b/src/lxc/network.c
|
||||
index 65727f6b5..cd8d0bb14 100644
|
||||
--- a/src/lxc/network.c
|
||||
+++ b/src/lxc/network.c
|
||||
@@ -503,6 +503,11 @@ static int instantiate_veth(struct lxc_handler *handler, struct lxc_netdev *netd
|
||||
netdev->upscript, "up", argv);
|
||||
if (err < 0)
|
||||
goto out_delete;
|
||||
+ } else if (netdev->link[0] == '\0') {
|
||||
+ err = run_script(handler->name, "net", "/usr/share/lxc/lxcnetaddbr", "up",
|
||||
+ "veth", veth1, (char*) NULL);
|
||||
+ if (err)
|
||||
+ goto out_delete;
|
||||
}
|
||||
|
||||
DEBUG("Instantiated veth \"%s/%s\", index is \"%d\"", veth1, veth2,
|
||||
--
|
||||
2.20.1
|
||||
|
@ -1,7 +1,7 @@
|
||||
From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
|
||||
From 08f4569b8f5fb9769ed6a76ddd1040a577d0b2fe Mon Sep 17 00:00:00 2001
|
||||
From: Wolfgang Bumiller <w.bumiller@proxmox.com>
|
||||
Date: Tue, 13 Aug 2019 13:57:22 +0200
|
||||
Subject: [PATCH] PVE: [Config] attach: always use getent
|
||||
Subject: [PATCH lxc 3/3] PVE: [Config] attach: always use getent
|
||||
|
||||
In debian buster, some libnss plugins (if installed) can
|
||||
cause getpwent to segfault instead of erroring out cleanly.
|
||||
@ -13,10 +13,10 @@ Signed-off-by: Wolfgang Bumiller <w.bumiller@proxmox.com>
|
||||
1 file changed, 2 insertions(+), 26 deletions(-)
|
||||
|
||||
diff --git a/src/lxc/attach.c b/src/lxc/attach.c
|
||||
index 80c41fe26..f30f192e3 100644
|
||||
index 406b8ec74..0a82c0a5d 100644
|
||||
--- a/src/lxc/attach.c
|
||||
+++ b/src/lxc/attach.c
|
||||
@@ -1506,12 +1506,8 @@ int lxc_attach_run_command(void *payload)
|
||||
@@ -1454,12 +1454,8 @@ int lxc_attach_run_command(void *payload)
|
||||
|
||||
int lxc_attach_run_shell(void* payload)
|
||||
{
|
||||
@ -29,7 +29,7 @@ index 80c41fe26..f30f192e3 100644
|
||||
int ret;
|
||||
|
||||
/* Ignore payload parameter. */
|
||||
@@ -1519,32 +1515,13 @@ int lxc_attach_run_shell(void* payload)
|
||||
@@ -1467,32 +1463,13 @@ int lxc_attach_run_shell(void* payload)
|
||||
|
||||
uid = getuid();
|
||||
|
||||
@ -63,7 +63,7 @@ index 80c41fe26..f30f192e3 100644
|
||||
if (user_shell)
|
||||
execlp(user_shell, user_shell, (char *)NULL);
|
||||
|
||||
@@ -1554,8 +1531,7 @@ int lxc_attach_run_shell(void* payload)
|
||||
@@ -1502,8 +1479,7 @@ int lxc_attach_run_shell(void* payload)
|
||||
execlp("/bin/sh", "/bin/sh", (char *)NULL);
|
||||
|
||||
SYSERROR("Failed to execute shell");
|
@ -1,207 +0,0 @@
|
||||
From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
|
||||
From: Wolfgang Bumiller <w.bumiller@proxmox.com>
|
||||
Date: Mon, 20 Nov 2017 10:49:41 +0100
|
||||
Subject: [PATCH] PVE: [Up] possibility to run lxc-monitord as a regular daemon
|
||||
|
||||
lxc-monitord instances are spawned on demand and, if this
|
||||
happens from a service, the daemon is considered part of
|
||||
it by systemd, as it is running in the same cgroups. This
|
||||
can be avoided by leaving it running permanently.
|
||||
|
||||
Signed-off-by: Wolfgang Bumiller <w.bumiller@proxmox.com>
|
||||
---
|
||||
.gitignore | 1 +
|
||||
config/init/systemd/Makefile.am | 10 ++--
|
||||
config/init/systemd/lxc-monitord.service.in | 12 +++++
|
||||
configure.ac | 1 +
|
||||
lxc.spec.in | 1 +
|
||||
src/lxc/cmd/lxc_monitord.c | 60 +++++++++++++++------
|
||||
6 files changed, 64 insertions(+), 21 deletions(-)
|
||||
create mode 100644 config/init/systemd/lxc-monitord.service.in
|
||||
|
||||
diff --git a/.gitignore b/.gitignore
|
||||
index b2d4657c4..36d0b7013 100644
|
||||
--- a/.gitignore
|
||||
+++ b/.gitignore
|
||||
@@ -119,6 +119,7 @@ config/bash/lxc
|
||||
config/init/common/lxc-containers
|
||||
config/init/common/lxc-net
|
||||
config/init/systemd/lxc-autostart-helper
|
||||
+config/init/systemd/lxc-monitord.service
|
||||
config/init/systemd/lxc-net.service
|
||||
config/init/systemd/lxc.service
|
||||
config/init/systemd/lxc@.service
|
||||
diff --git a/config/init/systemd/Makefile.am b/config/init/systemd/Makefile.am
|
||||
index c448850d1..4a4fde5e7 100644
|
||||
--- a/config/init/systemd/Makefile.am
|
||||
+++ b/config/init/systemd/Makefile.am
|
||||
@@ -2,19 +2,21 @@ EXTRA_DIST = \
|
||||
lxc-apparmor-load \
|
||||
lxc.service.in \
|
||||
lxc@.service.in \
|
||||
- lxc-net.service.in
|
||||
+ lxc-net.service.in \
|
||||
+ lxc-monitord.service.in
|
||||
|
||||
if INIT_SCRIPT_SYSTEMD
|
||||
-BUILT_SOURCES = lxc.service lxc@.service lxc-net.service
|
||||
+BUILT_SOURCES = lxc.service lxc@.service lxc-net.service lxc-monitord.service
|
||||
|
||||
-install-systemd: lxc.service lxc@.service lxc-net.service lxc-apparmor-load
|
||||
+install-systemd: lxc.service lxc@.service lxc-net.service lxc-monitord.service lxc-apparmor-load
|
||||
$(MKDIR_P) $(DESTDIR)$(SYSTEMD_UNIT_DIR)
|
||||
- $(INSTALL_DATA) lxc.service lxc@.service lxc-net.service $(DESTDIR)$(SYSTEMD_UNIT_DIR)/
|
||||
+ $(INSTALL_DATA) lxc.service lxc@.service lxc-net.service lxc-monitord.service $(DESTDIR)$(SYSTEMD_UNIT_DIR)/
|
||||
|
||||
uninstall-systemd:
|
||||
rm -f $(DESTDIR)$(SYSTEMD_UNIT_DIR)/lxc.service
|
||||
rm -f $(DESTDIR)$(SYSTEMD_UNIT_DIR)/lxc@.service
|
||||
rm -f $(DESTDIR)$(SYSTEMD_UNIT_DIR)/lxc-net.service
|
||||
+ rm -f $(DESTDIR)$(SYSTEMD_UNIT_DIR)/lxc-monitord.service
|
||||
rmdir $(DESTDIR)$(SYSTEMD_UNIT_DIR) || :
|
||||
|
||||
pkglibexec_SCRIPTS = lxc-apparmor-load
|
||||
diff --git a/config/init/systemd/lxc-monitord.service.in b/config/init/systemd/lxc-monitord.service.in
|
||||
new file mode 100644
|
||||
index 000000000..406351688
|
||||
--- /dev/null
|
||||
+++ b/config/init/systemd/lxc-monitord.service.in
|
||||
@@ -0,0 +1,12 @@
|
||||
+[Unit]
|
||||
+Description=LXC Container Monitoring Daemon
|
||||
+After=syslog.service network.target
|
||||
+
|
||||
+[Service]
|
||||
+Type=simple
|
||||
+ExecStart=@LIBEXECDIR@/lxc/lxc-monitord --daemon
|
||||
+StandardOutput=syslog
|
||||
+StandardError=syslog
|
||||
+
|
||||
+[Install]
|
||||
+WantedBy=multi-user.target
|
||||
diff --git a/configure.ac b/configure.ac
|
||||
index 645a2166d..6260f483f 100644
|
||||
--- a/configure.ac
|
||||
+++ b/configure.ac
|
||||
@@ -796,6 +796,7 @@ AC_CONFIG_FILES([
|
||||
config/init/systemd/lxc.service
|
||||
config/init/systemd/lxc@.service
|
||||
config/init/systemd/lxc-net.service
|
||||
+ config/init/systemd/lxc-monitord.service
|
||||
config/init/sysvinit/Makefile
|
||||
config/init/sysvinit/lxc-containers
|
||||
config/init/sysvinit/lxc-net
|
||||
diff --git a/lxc.spec.in b/lxc.spec.in
|
||||
index ec6321c33..ea6789fb6 100644
|
||||
--- a/lxc.spec.in
|
||||
+++ b/lxc.spec.in
|
||||
@@ -251,6 +251,7 @@ fi
|
||||
%{_unitdir}/lxc-net.service
|
||||
%{_unitdir}/lxc.service
|
||||
%{_unitdir}/lxc@.service
|
||||
+%{_unitdir}/lxc-monitord.service
|
||||
%else
|
||||
%{_sysconfdir}/rc.d/init.d/lxc
|
||||
%{_sysconfdir}/rc.d/init.d/lxc-net
|
||||
diff --git a/src/lxc/cmd/lxc_monitord.c b/src/lxc/cmd/lxc_monitord.c
|
||||
index 3b931b361..d3cc35978 100644
|
||||
--- a/src/lxc/cmd/lxc_monitord.c
|
||||
+++ b/src/lxc/cmd/lxc_monitord.c
|
||||
@@ -359,17 +359,44 @@ static void lxc_monitord_sig_handler(int sig)
|
||||
|
||||
int main(int argc, char *argv[])
|
||||
{
|
||||
- int ret, pipefd;
|
||||
+ int ret, pipefd = -1;
|
||||
char logpath[PATH_MAX];
|
||||
sigset_t mask;
|
||||
- char *lxcpath = argv[1];
|
||||
+ const char *lxcpath = NULL;
|
||||
bool mainloop_opened = false;
|
||||
bool monitord_created = false;
|
||||
+ bool persistent = false;
|
||||
struct lxc_log log;
|
||||
|
||||
- if (argc != 3) {
|
||||
+ if (argc > 1 && !strcmp(argv[1], "--daemon")) {
|
||||
+ persistent = true;
|
||||
+ --argc;
|
||||
+ ++argv;
|
||||
+ }
|
||||
+
|
||||
+ if (argc > 1) {
|
||||
+ lxcpath = argv[1];
|
||||
+ --argc;
|
||||
+ ++argv;
|
||||
+ } else {
|
||||
+ lxcpath = lxc_global_config_value("lxc.lxcpath");
|
||||
+ if (!lxcpath) {
|
||||
+ ERROR("Out of memory getting lxcpath");
|
||||
+ exit(EXIT_FAILURE);
|
||||
+ }
|
||||
+ }
|
||||
+
|
||||
+ if (argc > 1) {
|
||||
+ if (lxc_safe_int(argv[1], &pipefd) < 0)
|
||||
+ exit(EXIT_FAILURE);
|
||||
+ --argc;
|
||||
+ ++argv;
|
||||
+ }
|
||||
+
|
||||
+ if (argc != 1 || (persistent != (pipefd == -1))) {
|
||||
fprintf(stderr,
|
||||
- "Usage: lxc-monitord lxcpath sync-pipe-fd\n\n"
|
||||
+ "Usage: lxc-monitord lxcpath sync-pipe-fd\n"
|
||||
+ " lxc-monitord --daemon lxcpath\n\n"
|
||||
"NOTE: lxc-monitord is intended for use by lxc internally\n"
|
||||
" and does not need to be run by hand\n\n");
|
||||
exit(EXIT_FAILURE);
|
||||
@@ -392,9 +419,6 @@ int main(int argc, char *argv[])
|
||||
INFO("Failed to open log file %s, log will be lost", lxcpath);
|
||||
lxc_log_options_no_override();
|
||||
|
||||
- if (lxc_safe_int(argv[2], &pipefd) < 0)
|
||||
- exit(EXIT_FAILURE);
|
||||
-
|
||||
if (sigfillset(&mask) ||
|
||||
sigdelset(&mask, SIGILL) ||
|
||||
sigdelset(&mask, SIGSEGV) ||
|
||||
@@ -427,15 +451,17 @@ int main(int argc, char *argv[])
|
||||
goto on_error;
|
||||
monitord_created = true;
|
||||
|
||||
- /* sync with parent, we're ignoring the return from write
|
||||
- * because regardless if it works or not, the following
|
||||
- * close will sync us with the parent process. the
|
||||
- * if-empty-statement construct is to quiet the
|
||||
- * warn-unused-result warning.
|
||||
- */
|
||||
- if (lxc_write_nointr(pipefd, "S", 1))
|
||||
- ;
|
||||
- close(pipefd);
|
||||
+ if (pipefd != -1) {
|
||||
+ /* sync with parent, we're ignoring the return from write
|
||||
+ * because regardless if it works or not, the following
|
||||
+ * close will sync us with the parent process. the
|
||||
+ * if-empty-statement construct is to quiet the
|
||||
+ * warn-unused-result warning.
|
||||
+ */
|
||||
+ if (lxc_write_nointr(pipefd, "S", 1))
|
||||
+ ;
|
||||
+ close(pipefd);
|
||||
+ }
|
||||
|
||||
if (lxc_monitord_mainloop_add(&monitor)) {
|
||||
ERROR("Failed to add mainloop handlers");
|
||||
@@ -446,7 +472,7 @@ int main(int argc, char *argv[])
|
||||
lxc_raw_getpid(), monitor.lxcpath);
|
||||
|
||||
for (;;) {
|
||||
- ret = lxc_mainloop(&monitor.descr, 1000 * 30);
|
||||
+ ret = lxc_mainloop(&monitor.descr, persistent ? -1 : 1000 * 30);
|
||||
if (ret) {
|
||||
ERROR("mainloop returned an error");
|
||||
break;
|
||||
--
|
||||
2.20.1
|
||||
|
@ -1,50 +0,0 @@
|
||||
From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
|
||||
From: Wolfgang Bumiller <w.bumiller@proxmox.com>
|
||||
Date: Wed, 2 Jan 2019 14:37:58 +0100
|
||||
Subject: [PATCH] PVE: [Config] Disable lxc.monitor cgroup
|
||||
|
||||
When not using relative cgroups this makes lxc unusable
|
||||
within systemd service files as the service cgroup becomes
|
||||
empty.
|
||||
|
||||
Signed-off-by: Wolfgang Bumiller <w.bumiller@proxmox.com>
|
||||
---
|
||||
src/lxc/start.c | 22 +++++++++++-----------
|
||||
1 file changed, 11 insertions(+), 11 deletions(-)
|
||||
|
||||
diff --git a/src/lxc/start.c b/src/lxc/start.c
|
||||
index a9a07bc83..0169cf8e3 100644
|
||||
--- a/src/lxc/start.c
|
||||
+++ b/src/lxc/start.c
|
||||
@@ -2026,17 +2026,17 @@ int __lxc_start(const char *name, struct lxc_handler *handler,
|
||||
goto out_fini_nonet;
|
||||
}
|
||||
|
||||
- if (!cgroup_ops->monitor_create(cgroup_ops, handler)) {
|
||||
- ERROR("Failed to create monitor cgroup");
|
||||
- ret = -1;
|
||||
- goto out_fini_nonet;
|
||||
- }
|
||||
-
|
||||
- if (!cgroup_ops->monitor_enter(cgroup_ops, handler->monitor_pid)) {
|
||||
- ERROR("Failed to enter monitor cgroup");
|
||||
- ret = -1;
|
||||
- goto out_fini_nonet;
|
||||
- }
|
||||
+ //if (!cgroup_ops->monitor_create(cgroup_ops, handler)) {
|
||||
+ // ERROR("Failed to create monitor cgroup");
|
||||
+ // ret = -1;
|
||||
+ // goto out_fini_nonet;
|
||||
+ //}
|
||||
+
|
||||
+ //if (!cgroup_ops->monitor_enter(cgroup_ops, handler->monitor_pid)) {
|
||||
+ // ERROR("Failed to enter monitor cgroup");
|
||||
+ // ret = -1;
|
||||
+ // goto out_fini_nonet;
|
||||
+ //}
|
||||
|
||||
if (geteuid() == 0 && !lxc_list_empty(&conf->id_map)) {
|
||||
/* If the backing store is a device, mount it here and now. */
|
||||
--
|
||||
2.20.1
|
||||
|
@ -1,534 +0,0 @@
|
||||
From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
|
||||
From: Wolfgang Bumiller <w.bumiller@proxmox.com>
|
||||
Date: Wed, 28 Mar 2018 13:37:28 +0200
|
||||
Subject: [PATCH] PVE: [Up] separate the limiting from the namespaced cgroup
|
||||
root
|
||||
|
||||
When cgroup namespaces are enabled a privileged container
|
||||
with mixed cgroups has full write access to its own root
|
||||
cgroup effectively allowing it to overwrite values written
|
||||
from the outside or configured via lxc.cgroup.*.
|
||||
|
||||
This patch causes an additional 'ns/' directory to be
|
||||
created in all cgroups if cgroup namespaces and cgfsng are
|
||||
being used in order to combat this.
|
||||
|
||||
Signed-off-by: Wolfgang Bumiller <w.bumiller@proxmox.com>
|
||||
---
|
||||
src/lxc/cgroups/cgfsng.c | 80 +++++++++++++++++++++++++++++-------
|
||||
src/lxc/cgroups/cgroup.h | 18 +++++++--
|
||||
src/lxc/commands.c | 87 ++++++++++++++++++++++++++++++----------
|
||||
src/lxc/commands.h | 2 +
|
||||
src/lxc/criu.c | 4 +-
|
||||
src/lxc/start.c | 28 +++++++++----
|
||||
6 files changed, 171 insertions(+), 48 deletions(-)
|
||||
|
||||
diff --git a/src/lxc/cgroups/cgfsng.c b/src/lxc/cgroups/cgfsng.c
|
||||
index 1e6a45cff..c09b4ea71 100644
|
||||
--- a/src/lxc/cgroups/cgfsng.c
|
||||
+++ b/src/lxc/cgroups/cgfsng.c
|
||||
@@ -808,6 +808,7 @@ static struct hierarchy *add_hierarchy(struct hierarchy ***h, char **clist, char
|
||||
new->mountpoint = mountpoint;
|
||||
new->container_base_path = container_base_path;
|
||||
new->container_full_path = NULL;
|
||||
+ new->container_inner_path = NULL;
|
||||
new->monitor_full_path = NULL;
|
||||
new->version = type;
|
||||
new->cgroup2_chown = NULL;
|
||||
@@ -1048,6 +1049,9 @@ static int cgroup_rmdir(struct hierarchy **hierarchies,
|
||||
|
||||
free(h->container_full_path);
|
||||
h->container_full_path = NULL;
|
||||
+
|
||||
+ free(h->container_inner_path);
|
||||
+ h->container_inner_path = NULL;
|
||||
}
|
||||
|
||||
return 0;
|
||||
@@ -1059,6 +1063,7 @@ struct generic_userns_exec_data {
|
||||
struct lxc_conf *conf;
|
||||
uid_t origuid; /* target uid in parent namespace */
|
||||
char *path;
|
||||
+ bool inner;
|
||||
};
|
||||
|
||||
static int cgroup_rmdir_wrapper(void *data)
|
||||
@@ -1104,6 +1109,7 @@ __cgfsng_ops static void cgfsng_payload_destroy(struct cgroup_ops *ops,
|
||||
wrap.container_cgroup = ops->container_cgroup;
|
||||
wrap.hierarchies = ops->hierarchies;
|
||||
wrap.conf = handler->conf;
|
||||
+ wrap.inner = false;
|
||||
|
||||
if (handler->conf && !lxc_list_empty(&handler->conf->id_map))
|
||||
ret = userns_exec_1(handler->conf, cgroup_rmdir_wrapper, &wrap,
|
||||
@@ -1306,17 +1312,26 @@ static bool monitor_create_path_for_hierarchy(struct hierarchy *h, char *cgname)
|
||||
return cg_unified_create_cgroup(h, cgname);
|
||||
}
|
||||
|
||||
-static bool container_create_path_for_hierarchy(struct hierarchy *h, char *cgname)
|
||||
+static bool container_create_path_for_hierarchy(struct hierarchy *h, char *cgname, bool inner)
|
||||
{
|
||||
int ret;
|
||||
+ char *path;
|
||||
|
||||
- if (!cg_legacy_handle_cpuset_hierarchy(h, cgname)) {
|
||||
+ if (!inner && !cg_legacy_handle_cpuset_hierarchy(h, cgname)) {
|
||||
ERROR("Failed to handle legacy cpuset controller");
|
||||
return false;
|
||||
}
|
||||
|
||||
- h->container_full_path = must_make_path(h->mountpoint, h->container_base_path, cgname, NULL);
|
||||
- ret = mkdir_eexist_on_last(h->container_full_path, 0755);
|
||||
+ if (inner) {
|
||||
+ path = must_make_path(h->container_full_path, CGROUP_NAMESPACE_SUBDIR, NULL);
|
||||
+ h->container_inner_path = path;
|
||||
+ ret = mkdir(path, 0755);
|
||||
+ } else {
|
||||
+ path = must_make_path(h->mountpoint, h->container_base_path, cgname, NULL);
|
||||
+ h->container_full_path = path;
|
||||
+ ret = mkdir_eexist_on_last(path, 0755);
|
||||
+ }
|
||||
+
|
||||
if (ret < 0) {
|
||||
ERROR("Failed to create cgroup \"%s\"", h->container_full_path);
|
||||
return false;
|
||||
@@ -1408,11 +1423,29 @@ __cgfsng_ops static inline bool cgfsng_monitor_create(struct cgroup_ops *ops,
|
||||
return true;
|
||||
}
|
||||
|
||||
+static inline bool cgfsng_create_inner(struct cgroup_ops *ops)
|
||||
+{
|
||||
+ size_t i;
|
||||
+ bool ret = true;
|
||||
+ char *cgname = must_make_path(ops->container_cgroup, CGROUP_NAMESPACE_SUBDIR, NULL);
|
||||
+ for (i = 0; ops->hierarchies[i]; i++) {
|
||||
+ if (!container_create_path_for_hierarchy(ops->hierarchies[i], cgname, true)) {
|
||||
+ SYSERROR("Failed to create %s namespace subdirectory: %s",
|
||||
+ ops->hierarchies[i]->container_full_path, strerror(errno));
|
||||
+ ret = false;
|
||||
+ break;
|
||||
+ }
|
||||
+ }
|
||||
+ free(cgname);
|
||||
+ return ret;
|
||||
+}
|
||||
+
|
||||
/* Try to create the same cgroup in all hierarchies. Start with cgroup_pattern;
|
||||
* next cgroup_pattern-1, -2, ..., -999.
|
||||
*/
|
||||
__cgfsng_ops static inline bool cgfsng_payload_create(struct cgroup_ops *ops,
|
||||
- struct lxc_handler *handler)
|
||||
+ struct lxc_handler *handler,
|
||||
+ bool inner)
|
||||
{
|
||||
__do_free char *container_cgroup = NULL, *tmp = NULL;
|
||||
int i;
|
||||
@@ -1422,7 +1455,12 @@ __cgfsng_ops static inline bool cgfsng_payload_create(struct cgroup_ops *ops,
|
||||
struct lxc_conf *conf = handler->conf;
|
||||
|
||||
if (ops->container_cgroup)
|
||||
+ return inner ? cgfsng_create_inner(ops) : false;
|
||||
+
|
||||
+ if (inner) {
|
||||
+ ERROR("cgfsng_create called twice for inner cgroup");
|
||||
return false;
|
||||
+ }
|
||||
|
||||
if (!conf)
|
||||
return false;
|
||||
@@ -1453,7 +1491,7 @@ __cgfsng_ops static inline bool cgfsng_payload_create(struct cgroup_ops *ops,
|
||||
|
||||
for (i = 0; ops->hierarchies[i]; i++) {
|
||||
if (!container_create_path_for_hierarchy(ops->hierarchies[i],
|
||||
- container_cgroup)) {
|
||||
+ container_cgroup, false)) {
|
||||
ERROR("Failed to create cgroup \"%s\"",
|
||||
ops->hierarchies[i]->container_full_path);
|
||||
for (int j = 0; j < i; j++)
|
||||
@@ -1475,7 +1513,8 @@ __cgfsng_ops static inline bool cgfsng_payload_create(struct cgroup_ops *ops,
|
||||
}
|
||||
|
||||
__cgfsng_ops static bool __do_cgroup_enter(struct cgroup_ops *ops, pid_t pid,
|
||||
- bool monitor)
|
||||
+ bool monitor,
|
||||
+ bool inner)
|
||||
{
|
||||
int len;
|
||||
char pidstr[INTTYPE_TO_STRLEN(pid_t)];
|
||||
@@ -1494,6 +1533,9 @@ __cgfsng_ops static bool __do_cgroup_enter(struct cgroup_ops *ops, pid_t pid,
|
||||
if (monitor)
|
||||
path = must_make_path(ops->hierarchies[i]->monitor_full_path,
|
||||
"cgroup.procs", NULL);
|
||||
+ else if (inner)
|
||||
+ path = must_make_path(ops->hierarchies[i]->container_inner_path,
|
||||
+ "cgroup.procs", NULL);
|
||||
else
|
||||
path = must_make_path(ops->hierarchies[i]->container_full_path,
|
||||
"cgroup.procs", NULL);
|
||||
@@ -1509,12 +1551,12 @@ __cgfsng_ops static bool __do_cgroup_enter(struct cgroup_ops *ops, pid_t pid,
|
||||
|
||||
__cgfsng_ops static bool cgfsng_monitor_enter(struct cgroup_ops *ops, pid_t pid)
|
||||
{
|
||||
- return __do_cgroup_enter(ops, pid, true);
|
||||
+ return __do_cgroup_enter(ops, pid, true, false);
|
||||
}
|
||||
|
||||
-static bool cgfsng_payload_enter(struct cgroup_ops *ops, pid_t pid)
|
||||
+static bool cgfsng_payload_enter(struct cgroup_ops *ops, pid_t pid, bool inner)
|
||||
{
|
||||
- return __do_cgroup_enter(ops, pid, false);
|
||||
+ return __do_cgroup_enter(ops, pid, false, inner);
|
||||
}
|
||||
|
||||
static int chowmod(char *path, uid_t chown_uid, gid_t chown_gid,
|
||||
@@ -1578,8 +1620,12 @@ static int chown_cgroup_wrapper(void *data)
|
||||
|
||||
for (int i = 0; arg->hierarchies[i]; i++) {
|
||||
__do_free char *fullpath = NULL;
|
||||
+ __do_free char *inner_guard = NULL;
|
||||
char *path = arg->hierarchies[i]->container_full_path;
|
||||
|
||||
+ if (arg->inner)
|
||||
+ path = inner_guard = must_make_path(path, CGROUP_NAMESPACE_SUBDIR, NULL);
|
||||
+
|
||||
ret = chowmod(path, destuid, nsgid, 0775);
|
||||
if (ret < 0)
|
||||
return -1;
|
||||
@@ -1612,7 +1658,8 @@ static int chown_cgroup_wrapper(void *data)
|
||||
}
|
||||
|
||||
__cgfsng_ops static bool cgfsng_chown(struct cgroup_ops *ops,
|
||||
- struct lxc_conf *conf)
|
||||
+ struct lxc_conf *conf,
|
||||
+ bool inner)
|
||||
{
|
||||
struct generic_userns_exec_data wrap;
|
||||
|
||||
@@ -1626,6 +1673,7 @@ __cgfsng_ops static bool cgfsng_chown(struct cgroup_ops *ops,
|
||||
wrap.path = NULL;
|
||||
wrap.hierarchies = ops->hierarchies;
|
||||
wrap.conf = conf;
|
||||
+ wrap.inner = inner;
|
||||
|
||||
if (userns_exec_1(conf, chown_cgroup_wrapper, &wrap,
|
||||
"chown_cgroup_wrapper") < 0) {
|
||||
@@ -2100,7 +2148,8 @@ __cgfsng_ops static bool cgfsng_unfreeze(struct cgroup_ops *ops)
|
||||
}
|
||||
|
||||
__cgfsng_ops static const char *cgfsng_get_cgroup(struct cgroup_ops *ops,
|
||||
- const char *controller)
|
||||
+ const char *controller,
|
||||
+ bool inner)
|
||||
{
|
||||
struct hierarchy *h;
|
||||
|
||||
@@ -2111,6 +2160,9 @@ __cgfsng_ops static const char *cgfsng_get_cgroup(struct cgroup_ops *ops,
|
||||
return NULL;
|
||||
}
|
||||
|
||||
+ if (inner)
|
||||
+ return h->container_inner_path ? h->container_inner_path + strlen(h->mountpoint) : NULL;
|
||||
+
|
||||
return h->container_full_path ? h->container_full_path + strlen(h->mountpoint) : NULL;
|
||||
}
|
||||
|
||||
@@ -2143,7 +2195,7 @@ static int __cg_unified_attach(const struct hierarchy *h, const char *name,
|
||||
size_t len;
|
||||
int fret = -1, idx = 0;
|
||||
|
||||
- container_cgroup = lxc_cmd_get_cgroup_path(name, lxcpath, controller);
|
||||
+ container_cgroup = lxc_cmd_get_attach_cgroup_path(name, lxcpath, controller);
|
||||
/* not running */
|
||||
if (!container_cgroup)
|
||||
return 0;
|
||||
@@ -2220,7 +2272,7 @@ __cgfsng_ops static bool cgfsng_attach(struct cgroup_ops *ops, const char *name,
|
||||
continue;
|
||||
}
|
||||
|
||||
- path = lxc_cmd_get_cgroup_path(name, lxcpath, h->controllers[0]);
|
||||
+ path = lxc_cmd_get_attach_cgroup_path(name, lxcpath, h->controllers[0]);
|
||||
/* not running */
|
||||
if (!path)
|
||||
continue;
|
||||
diff --git a/src/lxc/cgroups/cgroup.h b/src/lxc/cgroups/cgroup.h
|
||||
index f3f0f6726..35d207feb 100644
|
||||
--- a/src/lxc/cgroups/cgroup.h
|
||||
+++ b/src/lxc/cgroups/cgroup.h
|
||||
@@ -32,6 +32,12 @@
|
||||
#define MONITOR_CGROUP "lxc.monitor"
|
||||
#define PIVOT_CGROUP "lxc.pivot"
|
||||
|
||||
+/* When lxc.cgroup.protect_limits is in effect the container's cgroup namespace
|
||||
+ * will be moved into an additional subdirectory "cgns/" inside the cgroup in
|
||||
+ * order to prevent it from accessing the outer limiting cgroup.
|
||||
+ */
|
||||
+#define CGROUP_NAMESPACE_SUBDIR "cgns"
|
||||
+
|
||||
struct lxc_handler;
|
||||
struct lxc_conf;
|
||||
struct lxc_list;
|
||||
@@ -72,6 +78,9 @@ typedef enum {
|
||||
* @monitor_full_path
|
||||
* - The full path to the monitor's cgroup.
|
||||
*
|
||||
+ * @container_inner_path
|
||||
+ * - The full path to the container's inner cgroup when protect_limits is used.
|
||||
+ *
|
||||
* @version
|
||||
* - legacy hierarchy
|
||||
* If the hierarchy is a legacy hierarchy this will be set to
|
||||
@@ -90,6 +99,7 @@ struct hierarchy {
|
||||
char *mountpoint;
|
||||
char *container_base_path;
|
||||
char *container_full_path;
|
||||
+ char *container_inner_path;
|
||||
char *monitor_full_path;
|
||||
int version;
|
||||
};
|
||||
@@ -144,9 +154,9 @@ struct cgroup_ops {
|
||||
void (*monitor_destroy)(struct cgroup_ops *ops, struct lxc_handler *handler);
|
||||
bool (*monitor_create)(struct cgroup_ops *ops, struct lxc_handler *handler);
|
||||
bool (*monitor_enter)(struct cgroup_ops *ops, pid_t pid);
|
||||
- bool (*payload_create)(struct cgroup_ops *ops, struct lxc_handler *handler);
|
||||
- bool (*payload_enter)(struct cgroup_ops *ops, pid_t pid);
|
||||
- const char *(*get_cgroup)(struct cgroup_ops *ops, const char *controller);
|
||||
+ bool (*payload_create)(struct cgroup_ops *ops, struct lxc_handler *handler, bool inner);
|
||||
+ bool (*payload_enter)(struct cgroup_ops *ops, pid_t pid, bool inner);
|
||||
+ const char *(*get_cgroup)(struct cgroup_ops *ops, const char *controller, bool inner);
|
||||
bool (*escape)(const struct cgroup_ops *ops, struct lxc_conf *conf);
|
||||
int (*num_hierarchies)(struct cgroup_ops *ops);
|
||||
bool (*get_hierarchies)(struct cgroup_ops *ops, int n, char ***out);
|
||||
@@ -158,7 +168,7 @@ struct cgroup_ops {
|
||||
bool (*unfreeze)(struct cgroup_ops *ops);
|
||||
bool (*setup_limits)(struct cgroup_ops *ops, struct lxc_conf *conf,
|
||||
bool with_devices);
|
||||
- bool (*chown)(struct cgroup_ops *ops, struct lxc_conf *conf);
|
||||
+ bool (*chown)(struct cgroup_ops *ops, struct lxc_conf *conf, bool inner);
|
||||
bool (*attach)(struct cgroup_ops *ops, const char *name,
|
||||
const char *lxcpath, pid_t pid);
|
||||
bool (*mount)(struct cgroup_ops *ops, struct lxc_handler *handler,
|
||||
diff --git a/src/lxc/commands.c b/src/lxc/commands.c
|
||||
index 90e3c5863..93406bb7e 100644
|
||||
--- a/src/lxc/commands.c
|
||||
+++ b/src/lxc/commands.c
|
||||
@@ -425,20 +425,8 @@ static int lxc_cmd_get_clone_flags_callback(int fd, struct lxc_cmd_req *req,
|
||||
return lxc_cmd_rsp_send(fd, &rsp);
|
||||
}
|
||||
|
||||
-/*
|
||||
- * lxc_cmd_get_cgroup_path: Calculate a container's cgroup path for a
|
||||
- * particular subsystem. This is the cgroup path relative to the root
|
||||
- * of the cgroup filesystem.
|
||||
- *
|
||||
- * @name : name of container to connect to
|
||||
- * @lxcpath : the lxcpath in which the container is running
|
||||
- * @subsystem : the subsystem being asked about
|
||||
- *
|
||||
- * Returns the path on success, NULL on failure. The caller must free() the
|
||||
- * returned path.
|
||||
- */
|
||||
-char *lxc_cmd_get_cgroup_path(const char *name, const char *lxcpath,
|
||||
- const char *subsystem)
|
||||
+char *do_lxc_cmd_get_cgroup_path(const char *name, const char *lxcpath,
|
||||
+ const char *subsystem, bool inner)
|
||||
{
|
||||
int ret, stopped;
|
||||
struct lxc_cmd_rr cmd = {
|
||||
@@ -451,8 +439,18 @@ char *lxc_cmd_get_cgroup_path(const char *name, const char *lxcpath,
|
||||
|
||||
cmd.req.data = subsystem;
|
||||
cmd.req.datalen = 0;
|
||||
- if (subsystem)
|
||||
- cmd.req.datalen = strlen(subsystem) + 1;
|
||||
+ if (subsystem) {
|
||||
+ size_t subsyslen = strlen(subsystem);
|
||||
+ if (inner) {
|
||||
+ char *data = alloca(subsyslen+2);
|
||||
+ memcpy(data, subsystem, subsyslen+1);
|
||||
+ data[subsyslen+1] = 1;
|
||||
+ cmd.req.datalen = subsyslen+2,
|
||||
+ cmd.req.data = data;
|
||||
+ } else {
|
||||
+ cmd.req.datalen = subsyslen+1;
|
||||
+ }
|
||||
+ }
|
||||
|
||||
ret = lxc_cmd(name, &cmd, &stopped, lxcpath, NULL);
|
||||
if (ret < 0)
|
||||
@@ -467,6 +465,42 @@ char *lxc_cmd_get_cgroup_path(const char *name, const char *lxcpath,
|
||||
return cmd.rsp.data;
|
||||
}
|
||||
|
||||
+/*
|
||||
+ * lxc_cmd_get_cgroup_path: Calculate a container's cgroup path for a
|
||||
+ * particular subsystem. This is the cgroup path relative to the root
|
||||
+ * of the cgroup filesystem.
|
||||
+ *
|
||||
+ * @name : name of container to connect to
|
||||
+ * @lxcpath : the lxcpath in which the container is running
|
||||
+ * @subsystem : the subsystem being asked about
|
||||
+ *
|
||||
+ * Returns the path on success, NULL on failure. The caller must free() the
|
||||
+ * returned path.
|
||||
+ */
|
||||
+char *lxc_cmd_get_cgroup_path(const char *name, const char *lxcpath,
|
||||
+ const char *subsystem)
|
||||
+{
|
||||
+ return do_lxc_cmd_get_cgroup_path(name, lxcpath, subsystem, false);
|
||||
+}
|
||||
+
|
||||
+/*
|
||||
+ * lxc_cmd_get_attach_cgroup_path: Calculate a container's inner cgroup path
|
||||
+ * for a particular subsystem. This is the cgroup path relative to the root
|
||||
+ * of the cgroup filesystem.
|
||||
+ *
|
||||
+ * @name : name of container to connect to
|
||||
+ * @lxcpath : the lxcpath in which the container is running
|
||||
+ * @subsystem : the subsystem being asked about
|
||||
+ *
|
||||
+ * Returns the path on success, NULL on failure. The caller must free() the
|
||||
+ * returned path.
|
||||
+ */
|
||||
+char *lxc_cmd_get_attach_cgroup_path(const char *name, const char *lxcpath,
|
||||
+ const char *subsystem)
|
||||
+{
|
||||
+ return do_lxc_cmd_get_cgroup_path(name, lxcpath, subsystem, true);
|
||||
+}
|
||||
+
|
||||
static int lxc_cmd_get_cgroup_callback(int fd, struct lxc_cmd_req *req,
|
||||
struct lxc_handler *handler,
|
||||
struct lxc_epoll_descr *descr)
|
||||
@@ -475,10 +509,21 @@ static int lxc_cmd_get_cgroup_callback(int fd, struct lxc_cmd_req *req,
|
||||
struct lxc_cmd_rsp rsp;
|
||||
struct cgroup_ops *cgroup_ops = handler->cgroup_ops;
|
||||
|
||||
- if (req->datalen > 0)
|
||||
- path = cgroup_ops->get_cgroup(cgroup_ops, req->data);
|
||||
- else
|
||||
- path = cgroup_ops->get_cgroup(cgroup_ops, NULL);
|
||||
+ if (req->datalen > 0) {
|
||||
+ const char *subsystem;
|
||||
+ size_t subsyslen;
|
||||
+ bool inner = false;
|
||||
+ subsystem = req->data;
|
||||
+ subsyslen = strlen(subsystem);
|
||||
+ if (req->datalen == subsyslen+2)
|
||||
+ inner = (subsystem[subsyslen+1] == 1);
|
||||
+
|
||||
+ path = cgroup_ops->get_cgroup(cgroup_ops, req->data, inner);
|
||||
+ } else {
|
||||
+ // FIXME: cgroup separation for cgroup v2 cannot be handled
|
||||
+ // like we used to do v1 here... need to figure this out...
|
||||
+ path = cgroup_ops->get_cgroup(cgroup_ops, NULL, false);
|
||||
+ }
|
||||
if (!path)
|
||||
return -1;
|
||||
|
||||
@@ -653,7 +698,7 @@ static int lxc_cmd_stop_callback(int fd, struct lxc_cmd_req *req,
|
||||
* lxc_unfreeze() would do another cmd (GET_CGROUP) which would
|
||||
* deadlock us.
|
||||
*/
|
||||
- if (!cgroup_ops->get_cgroup(cgroup_ops, "freezer"))
|
||||
+ if (!cgroup_ops->get_cgroup(cgroup_ops, "freezer", false))
|
||||
return 0;
|
||||
|
||||
if (cgroup_ops->unfreeze(cgroup_ops))
|
||||
diff --git a/src/lxc/commands.h b/src/lxc/commands.h
|
||||
index d7d0c6096..042892a42 100644
|
||||
--- a/src/lxc/commands.h
|
||||
+++ b/src/lxc/commands.h
|
||||
@@ -89,6 +89,8 @@ extern int lxc_cmd_console(const char *name, int *ttynum, int *fd,
|
||||
*/
|
||||
extern char *lxc_cmd_get_cgroup_path(const char *name, const char *lxcpath,
|
||||
const char *subsystem);
|
||||
+extern char *lxc_cmd_get_attach_cgroup_path(const char *name,
|
||||
+ const char *lxcpath, const char *subsystem);
|
||||
extern int lxc_cmd_get_clone_flags(const char *name, const char *lxcpath);
|
||||
extern char *lxc_cmd_get_config_item(const char *name, const char *item, const char *lxcpath);
|
||||
extern char *lxc_cmd_get_name(const char *hashed_sock);
|
||||
diff --git a/src/lxc/criu.c b/src/lxc/criu.c
|
||||
index 86f6f1836..15a703c4f 100644
|
||||
--- a/src/lxc/criu.c
|
||||
+++ b/src/lxc/criu.c
|
||||
@@ -332,7 +332,7 @@ static void exec_criu(struct cgroup_ops *cgroup_ops, struct lxc_conf *conf,
|
||||
} else {
|
||||
const char *p;
|
||||
|
||||
- p = cgroup_ops->get_cgroup(cgroup_ops, controllers[0]);
|
||||
+ p = cgroup_ops->get_cgroup(cgroup_ops, controllers[0], false);
|
||||
if (!p) {
|
||||
ERROR("failed to get cgroup path for %s", controllers[0]);
|
||||
goto err;
|
||||
@@ -975,7 +975,7 @@ static void do_restore(struct lxc_container *c, int status_pipe, struct migrate_
|
||||
goto out_fini_handler;
|
||||
handler->cgroup_ops = cgroup_ops;
|
||||
|
||||
- if (!cgroup_ops->payload_create(cgroup_ops, handler)) {
|
||||
+ if (!cgroup_ops->payload_create(cgroup_ops, handler, false)) {
|
||||
ERROR("failed creating groups");
|
||||
goto out_fini_handler;
|
||||
}
|
||||
diff --git a/src/lxc/start.c b/src/lxc/start.c
|
||||
index 0169cf8e3..db0625af5 100644
|
||||
--- a/src/lxc/start.c
|
||||
+++ b/src/lxc/start.c
|
||||
@@ -1726,7 +1726,7 @@ static int lxc_spawn(struct lxc_handler *handler)
|
||||
}
|
||||
}
|
||||
|
||||
- if (!cgroup_ops->payload_create(cgroup_ops, handler)) {
|
||||
+ if (!cgroup_ops->payload_create(cgroup_ops, handler, false)) {
|
||||
ERROR("Failed creating cgroups");
|
||||
goto out_delete_net;
|
||||
}
|
||||
@@ -1841,10 +1841,10 @@ static int lxc_spawn(struct lxc_handler *handler)
|
||||
goto out_delete_net;
|
||||
}
|
||||
|
||||
- if (!cgroup_ops->payload_enter(cgroup_ops, handler->pid))
|
||||
+ if (!cgroup_ops->payload_enter(cgroup_ops, handler->pid, false))
|
||||
goto out_delete_net;
|
||||
|
||||
- if (!cgroup_ops->chown(cgroup_ops, handler->conf))
|
||||
+ if (!cgroup_ops->chown(cgroup_ops, handler->conf, false))
|
||||
goto out_delete_net;
|
||||
|
||||
/* If not done yet, we're now ready to preserve the network namespace */
|
||||
@@ -1902,16 +1902,30 @@ static int lxc_spawn(struct lxc_handler *handler)
|
||||
}
|
||||
}
|
||||
|
||||
- ret = lxc_sync_barrier_child(handler, LXC_SYNC_CGROUP_UNSHARE);
|
||||
- if (ret < 0)
|
||||
- goto out_delete_net;
|
||||
-
|
||||
if (!cgroup_ops->setup_limits(cgroup_ops, handler->conf, true)) {
|
||||
ERROR("Failed to setup legacy device cgroup controller limits");
|
||||
goto out_delete_net;
|
||||
}
|
||||
TRACE("Set up legacy device cgroup controller limits");
|
||||
|
||||
+ if (cgns_supported()) {
|
||||
+ if (!cgroup_ops->payload_create(cgroup_ops, handler, true)) {
|
||||
+ ERROR("failed to create inner cgroup separation layer");
|
||||
+ goto out_delete_net;
|
||||
+ }
|
||||
+ if (!cgroup_ops->payload_enter(cgroup_ops, handler->pid, true)) {
|
||||
+ ERROR("failed to enter inner cgroup separation layer");
|
||||
+ goto out_delete_net;
|
||||
+ }
|
||||
+ if (!cgroup_ops->chown(cgroup_ops, handler->conf, true)) {
|
||||
+ ERROR("failed chown inner cgroup separation layer");
|
||||
+ goto out_delete_net;
|
||||
+ }
|
||||
+ }
|
||||
+
|
||||
+ if (lxc_sync_barrier_child(handler, LXC_SYNC_CGROUP_UNSHARE))
|
||||
+ goto out_delete_net;
|
||||
+
|
||||
if (handler->ns_clone_flags & CLONE_NEWCGROUP) {
|
||||
/* Now we're ready to preserve the cgroup namespace */
|
||||
ret = lxc_try_preserve_ns(handler->pid, "cgroup");
|
||||
--
|
||||
2.20.1
|
||||
|
@ -1,97 +0,0 @@
|
||||
From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
|
||||
From: Wolfgang Bumiller <w.bumiller@proxmox.com>
|
||||
Date: Wed, 28 Mar 2018 13:41:46 +0200
|
||||
Subject: [PATCH] PVE: [Up] start/initutils: make cgroupns separation level
|
||||
configurable
|
||||
|
||||
Adds a new global config variable `lxc.cgroup.separate`
|
||||
which controls whether a separation directory for cgroup
|
||||
namespaces should be used.
|
||||
Can be empty, "privileged", "unprivileged" or "both".
|
||||
|
||||
Signed-off-by: Wolfgang Bumiller <w.bumiller@proxmox.com>
|
||||
---
|
||||
src/lxc/initutils.c | 17 +++++++++--------
|
||||
src/lxc/initutils.h | 1 +
|
||||
src/lxc/start.c | 25 ++++++++++++++-----------
|
||||
3 files changed, 24 insertions(+), 19 deletions(-)
|
||||
|
||||
diff --git a/src/lxc/initutils.c b/src/lxc/initutils.c
|
||||
index da3363294..5e33afc58 100644
|
||||
--- a/src/lxc/initutils.c
|
||||
+++ b/src/lxc/initutils.c
|
||||
@@ -64,14 +64,15 @@ static char *copy_global_config_value(char *p)
|
||||
const char *lxc_global_config_value(const char *option_name)
|
||||
{
|
||||
static const char * const options[][2] = {
|
||||
- { "lxc.bdev.lvm.vg", DEFAULT_VG },
|
||||
- { "lxc.bdev.lvm.thin_pool", DEFAULT_THIN_POOL },
|
||||
- { "lxc.bdev.zfs.root", DEFAULT_ZFSROOT },
|
||||
- { "lxc.bdev.rbd.rbdpool", DEFAULT_RBDPOOL },
|
||||
- { "lxc.lxcpath", NULL },
|
||||
- { "lxc.default_config", NULL },
|
||||
- { "lxc.cgroup.pattern", NULL },
|
||||
- { "lxc.cgroup.use", NULL },
|
||||
+ { "lxc.bdev.lvm.vg", DEFAULT_VG },
|
||||
+ { "lxc.bdev.lvm.thin_pool", DEFAULT_THIN_POOL },
|
||||
+ { "lxc.bdev.zfs.root", DEFAULT_ZFSROOT },
|
||||
+ { "lxc.bdev.rbd.rbdpool", DEFAULT_RBDPOOL },
|
||||
+ { "lxc.lxcpath", NULL },
|
||||
+ { "lxc.default_config", NULL },
|
||||
+ { "lxc.cgroup.pattern", NULL },
|
||||
+ { "lxc.cgroup.use", NULL },
|
||||
+ { "lxc.cgroup.protect_limits", DEFAULT_CGPROTECT },
|
||||
{ NULL, NULL },
|
||||
};
|
||||
|
||||
diff --git a/src/lxc/initutils.h b/src/lxc/initutils.h
|
||||
index 6bf23a706..b542e6015 100644
|
||||
--- a/src/lxc/initutils.h
|
||||
+++ b/src/lxc/initutils.h
|
||||
@@ -42,6 +42,7 @@
|
||||
#define DEFAULT_THIN_POOL "lxc"
|
||||
#define DEFAULT_ZFSROOT "lxc"
|
||||
#define DEFAULT_RBDPOOL "lxc"
|
||||
+#define DEFAULT_CGPROTECT "privileged"
|
||||
|
||||
#ifndef PR_SET_MM
|
||||
#define PR_SET_MM 35
|
||||
diff --git a/src/lxc/start.c b/src/lxc/start.c
|
||||
index db0625af5..0ee7f9636 100644
|
||||
--- a/src/lxc/start.c
|
||||
+++ b/src/lxc/start.c
|
||||
@@ -1909,17 +1909,20 @@ static int lxc_spawn(struct lxc_handler *handler)
|
||||
TRACE("Set up legacy device cgroup controller limits");
|
||||
|
||||
if (cgns_supported()) {
|
||||
- if (!cgroup_ops->payload_create(cgroup_ops, handler, true)) {
|
||||
- ERROR("failed to create inner cgroup separation layer");
|
||||
- goto out_delete_net;
|
||||
- }
|
||||
- if (!cgroup_ops->payload_enter(cgroup_ops, handler->pid, true)) {
|
||||
- ERROR("failed to enter inner cgroup separation layer");
|
||||
- goto out_delete_net;
|
||||
- }
|
||||
- if (!cgroup_ops->chown(cgroup_ops, handler->conf, true)) {
|
||||
- ERROR("failed chown inner cgroup separation layer");
|
||||
- goto out_delete_net;
|
||||
+ const char *tmp = lxc_global_config_value("lxc.cgroup.protect_limits");
|
||||
+ if (!strcmp(tmp, "both") || !strcmp(tmp, wants_to_map_ids ? "unprivileged" : "privileged")) {
|
||||
+ if (!cgroup_ops->payload_create(cgroup_ops, handler, true)) {
|
||||
+ ERROR("failed to create inner cgroup separation layer");
|
||||
+ goto out_delete_net;
|
||||
+ }
|
||||
+ if (!cgroup_ops->payload_enter(cgroup_ops, handler->pid, true)) {
|
||||
+ ERROR("failed to enter inner cgroup separation layer");
|
||||
+ goto out_delete_net;
|
||||
+ }
|
||||
+ if (!cgroup_ops->chown(cgroup_ops, handler->conf, true)) {
|
||||
+ ERROR("failed chown inner cgroup separation layer");
|
||||
+ goto out_delete_net;
|
||||
+ }
|
||||
}
|
||||
}
|
||||
|
||||
--
|
||||
2.20.1
|
||||
|
@ -1,43 +0,0 @@
|
||||
From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
|
||||
From: Wolfgang Bumiller <w.bumiller@proxmox.com>
|
||||
Date: Fri, 23 Dec 2016 15:57:24 +0100
|
||||
Subject: [PATCH] PVE: [Config] namespace separation
|
||||
|
||||
* rename cgroup namespace directory to ns
|
||||
* set lxc.cgroup.protect_limits default to 'both'
|
||||
|
||||
Signed-off-by: Wolfgang Bumiller <w.bumiller@proxmox.com>
|
||||
---
|
||||
src/lxc/cgroups/cgroup.h | 2 +-
|
||||
src/lxc/initutils.h | 2 +-
|
||||
2 files changed, 2 insertions(+), 2 deletions(-)
|
||||
|
||||
diff --git a/src/lxc/cgroups/cgroup.h b/src/lxc/cgroups/cgroup.h
|
||||
index 35d207feb..be9df33a2 100644
|
||||
--- a/src/lxc/cgroups/cgroup.h
|
||||
+++ b/src/lxc/cgroups/cgroup.h
|
||||
@@ -36,7 +36,7 @@
|
||||
* will be moved into an additional subdirectory "cgns/" inside the cgroup in
|
||||
* order to prevent it from accessing the outer limiting cgroup.
|
||||
*/
|
||||
-#define CGROUP_NAMESPACE_SUBDIR "cgns"
|
||||
+#define CGROUP_NAMESPACE_SUBDIR "ns"
|
||||
|
||||
struct lxc_handler;
|
||||
struct lxc_conf;
|
||||
diff --git a/src/lxc/initutils.h b/src/lxc/initutils.h
|
||||
index b542e6015..78d3f2b10 100644
|
||||
--- a/src/lxc/initutils.h
|
||||
+++ b/src/lxc/initutils.h
|
||||
@@ -42,7 +42,7 @@
|
||||
#define DEFAULT_THIN_POOL "lxc"
|
||||
#define DEFAULT_ZFSROOT "lxc"
|
||||
#define DEFAULT_RBDPOOL "lxc"
|
||||
-#define DEFAULT_CGPROTECT "privileged"
|
||||
+#define DEFAULT_CGPROTECT "both"
|
||||
|
||||
#ifndef PR_SET_MM
|
||||
#define PR_SET_MM 35
|
||||
--
|
||||
2.20.1
|
||||
|
@ -1,25 +0,0 @@
|
||||
From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
|
||||
From: Wolfgang Bumiller <w.bumiller@proxmox.com>
|
||||
Date: Wed, 10 Jul 2019 14:29:54 +0200
|
||||
Subject: [PATCH] init: add ExecReload to lxc.service to only reload profiles
|
||||
|
||||
Signed-off-by: Wolfgang Bumiller <w.bumiller@proxmox.com>
|
||||
---
|
||||
config/init/systemd/lxc.service.in | 1 +
|
||||
1 file changed, 1 insertion(+)
|
||||
|
||||
diff --git a/config/init/systemd/lxc.service.in b/config/init/systemd/lxc.service.in
|
||||
index 77541917e..e4c086e0a 100644
|
||||
--- a/config/init/systemd/lxc.service.in
|
||||
+++ b/config/init/systemd/lxc.service.in
|
||||
@@ -10,6 +10,7 @@ RemainAfterExit=yes
|
||||
ExecStartPre=@LIBEXECDIR@/lxc/lxc-apparmor-load
|
||||
ExecStart=@LIBEXECDIR@/lxc/lxc-containers start
|
||||
ExecStop=@LIBEXECDIR@/lxc/lxc-containers stop
|
||||
+ExecReload=@LIBEXECDIR@/lxc/lxc-apparmor-load
|
||||
# Environment=BOOTUP=serial
|
||||
# Environment=CONSOLETYPE=serial
|
||||
Delegate=yes
|
||||
--
|
||||
2.20.1
|
||||
|
@ -1,169 +0,0 @@
|
||||
From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
|
||||
From: Wolfgang Bumiller <w.bumiller@proxmox.com>
|
||||
Date: Fri, 2 Aug 2019 12:57:42 +0200
|
||||
Subject: [PATCH] apparmor: generate ro,bind,remount rule list
|
||||
|
||||
and update to changes based on lxd
|
||||
|
||||
Signed-off-by: Wolfgang Bumiller <w.bumiller@proxmox.com>
|
||||
---
|
||||
src/lxc/lsm/apparmor.c | 114 ++++++++++++++++++++++++++++++++++++-----
|
||||
1 file changed, 100 insertions(+), 14 deletions(-)
|
||||
|
||||
diff --git a/src/lxc/lsm/apparmor.c b/src/lxc/lsm/apparmor.c
|
||||
index e32b12531..47f825866 100644
|
||||
--- a/src/lxc/lsm/apparmor.c
|
||||
+++ b/src/lxc/lsm/apparmor.c
|
||||
@@ -149,6 +149,16 @@ static const char AA_PROFILE_BASE[] =
|
||||
"# mount options=(rw,make-unbindable) -> **,\n"
|
||||
"# mount options=(rw,make-runbindable) -> **,\n"
|
||||
"\n"
|
||||
+"# Allow limited modification of mount propagation\n"
|
||||
+" mount options=(rw,make-slave) -> /,\n"
|
||||
+" mount options=(rw,make-rslave) -> /,\n"
|
||||
+" mount options=(rw,make-shared) -> /,\n"
|
||||
+" mount options=(rw,make-rshared) -> /,\n"
|
||||
+" mount options=(rw,make-private) -> /,\n"
|
||||
+" mount options=(rw,make-rprivate) -> /,\n"
|
||||
+" mount options=(rw,make-unbindable) -> /,\n"
|
||||
+" mount options=(rw,make-runbindable) -> /,\n"
|
||||
+"\n"
|
||||
" # allow bind-mounts of anything except /proc, /sys and /dev\n"
|
||||
" mount options=(rw,bind) /[^spd]*{,/**},\n"
|
||||
" mount options=(rw,bind) /d[^e]*{,/**},\n"
|
||||
@@ -167,15 +177,18 @@ static const char AA_PROFILE_BASE[] =
|
||||
" mount options=(rw,bind) /sy[^s]*{,/**},\n"
|
||||
" mount options=(rw,bind) /sys?*{,/**},\n"
|
||||
"\n"
|
||||
-" # allow various ro-bind-*re*-mounts\n"
|
||||
-" mount options=(ro,remount,bind),\n"
|
||||
-" mount options=(ro,remount,bind,nosuid),\n"
|
||||
-" mount options=(ro,remount,bind,noexec),\n"
|
||||
-" mount options=(ro,remount,bind,nodev),\n"
|
||||
-" mount options=(ro,remount,bind,nosuid,noexec),\n"
|
||||
-" mount options=(ro,remount,bind,noexec,nodev),\n"
|
||||
-" mount options=(ro,remount,bind,nodev,nosuid),\n"
|
||||
-" mount options=(ro,remount,bind,nosuid,noexec,nodev),\n"
|
||||
+" # Allow rbind-mounts of anything except /, /dev, /proc and /sys\n"
|
||||
+" mount options=(rw,rbind) /[^spd]*{,/**},\n"
|
||||
+" mount options=(rw,rbind) /d[^e]*{,/**},\n"
|
||||
+" mount options=(rw,rbind) /de[^v]*{,/**},\n"
|
||||
+" mount options=(rw,rbind) /dev?*{,/**},\n"
|
||||
+" mount options=(rw,rbind) /p[^r]*{,/**},\n"
|
||||
+" mount options=(rw,rbind) /pr[^o]*{,/**},\n"
|
||||
+" mount options=(rw,rbind) /pro[^c]*{,/**},\n"
|
||||
+" mount options=(rw,rbind) /proc?*{,/**},\n"
|
||||
+" mount options=(rw,rbind) /s[^y]*{,/**},\n"
|
||||
+" mount options=(rw,rbind) /sy[^s]*{,/**},\n"
|
||||
+" mount options=(rw,rbind) /sys?*{,/**},\n"
|
||||
"\n"
|
||||
" # allow moving mounts except for /proc, /sys and /dev\n"
|
||||
" mount options=(rw,move) /[^spd]*{,/**},\n"
|
||||
@@ -341,12 +354,13 @@ static const char AA_PROFILE_NESTING_BASE[] =
|
||||
"\n"
|
||||
" mount fstype=proc -> /usr/lib/*/lxc/**,\n"
|
||||
" mount fstype=sysfs -> /usr/lib/*/lxc/**,\n"
|
||||
-" mount options=(rw,bind),\n"
|
||||
-" mount options=(rw,rbind),\n"
|
||||
-" mount options=(rw,make-rshared),\n"
|
||||
"\n"
|
||||
- /* FIXME: What's the state here on apparmor's side? */
|
||||
-" # there doesn't seem to be a way to ask for:\n"
|
||||
+" # Allow nested LXD\n"
|
||||
+" mount none -> /var/lib/lxd/shmounts/,\n"
|
||||
+" mount /var/lib/lxd/shmounts/ -> /var/lib/lxd/shmounts/,\n"
|
||||
+" mount options=bind /var/lib/lxd/shmounts/** -> /var/lib/lxd/**,\n"
|
||||
+"\n"
|
||||
+" # FIXME: There doesn't seem to be a way to ask for:\n"
|
||||
" # mount options=(ro,nosuid,nodev,noexec,remount,bind),\n"
|
||||
" # as we always get mount to $cdir/proc/sys with those flags denied\n"
|
||||
" # So allow all mounts until that is straightened out:\n"
|
||||
@@ -648,6 +662,76 @@ static bool is_privileged(struct lxc_conf *conf)
|
||||
return lxc_list_empty(&conf->id_map);
|
||||
}
|
||||
|
||||
+static const char* AA_ALL_DEST_PATH_LIST[] = {
|
||||
+ " -> /[^spd]*{,/**},\n",
|
||||
+ " -> /d[^e]*{,/**},\n",
|
||||
+ " -> /de[^v]*{,/**},\n",
|
||||
+ " -> /dev/.[^l]*{,/**},\n",
|
||||
+ " -> /dev/.l[^x]*{,/**},\n",
|
||||
+ " -> /dev/.lx[^c]*{,/**},\n",
|
||||
+ " -> /dev/.lxc?*{,/**},\n",
|
||||
+ " -> /dev/[^.]*{,/**},\n",
|
||||
+ " -> /dev?*{,/**},\n",
|
||||
+ " -> /p[^r]*{,/**},\n",
|
||||
+ " -> /pr[^o]*{,/**},\n",
|
||||
+ " -> /pro[^c]*{,/**},\n",
|
||||
+ " -> /proc?*{,/**},\n",
|
||||
+ " -> /s[^y]*{,/**},\n",
|
||||
+ " -> /sy[^s]*{,/**},\n",
|
||||
+ " -> /sys?*{,/**},\n",
|
||||
+ NULL,
|
||||
+};
|
||||
+
|
||||
+static void append_remount_rule(char **profile, size_t *size, const char *rule)
|
||||
+{
|
||||
+ size_t rule_len = strlen(rule);
|
||||
+
|
||||
+ for (const char **dest = AA_ALL_DEST_PATH_LIST; *dest; ++dest) {
|
||||
+ must_append_sized(profile, size, rule, rule_len);
|
||||
+ must_append_sized(profile, size, *dest, strlen(*dest));
|
||||
+ }
|
||||
+}
|
||||
+
|
||||
+static void append_all_remount_rules(char **profile, size_t *size)
|
||||
+{
|
||||
+ must_append_sized(profile, size,
|
||||
+ "# allow various ro-bind-*re*mounts\n",
|
||||
+ sizeof("# allow various ro-bind-*re*mounts\n")-1);
|
||||
+
|
||||
+ static struct mntopt_t {
|
||||
+ const char *opt;
|
||||
+ size_t len;
|
||||
+ } mnt_opt_list[] = {
|
||||
+ { ",nodev", sizeof(",nodev")-1 },
|
||||
+ { ",nosuid", sizeof(",nosuid")-1 },
|
||||
+ { ",noexec", sizeof(",noexec")-1 },
|
||||
+ };
|
||||
+
|
||||
+ const size_t opt_count = sizeof(mnt_opt_list) / sizeof(mnt_opt_list[0]);
|
||||
+
|
||||
+ char buf[128] = "mount options=(ro,remount,bind";
|
||||
+ const size_t start = strlen(buf);
|
||||
+ for (size_t i = 0; i != 1 << opt_count; ++i) {
|
||||
+ size_t at = start;
|
||||
+ unsigned opt_bit = 1;
|
||||
+
|
||||
+ for (size_t o = 0; o != opt_count; ++o, opt_bit <<= 1) {
|
||||
+ if (i & opt_bit) {
|
||||
+ struct mntopt_t *opt = &mnt_opt_list[o];
|
||||
+ memcpy(&buf[at], opt->opt, opt->len);
|
||||
+ at += opt->len;
|
||||
+ }
|
||||
+ }
|
||||
+
|
||||
+ memcpy(&buf[at], ")", sizeof(")"));
|
||||
+ append_remount_rule(profile, size, buf);
|
||||
+ memcpy(&buf[at], ",noatime)", sizeof(",noatime)"));
|
||||
+ append_remount_rule(profile, size, buf);
|
||||
+ memcpy(&buf[at], ",strictatime)", sizeof(",strictatime)"));
|
||||
+ append_remount_rule(profile, size, buf);
|
||||
+ }
|
||||
+}
|
||||
+
|
||||
static char *get_apparmor_profile_content(struct lxc_conf *conf, const char *lxcpath)
|
||||
{
|
||||
char *profile, *profile_name_full;
|
||||
@@ -665,6 +749,8 @@ static char *get_apparmor_profile_content(struct lxc_conf *conf, const char *lxc
|
||||
must_append_sized(&profile, &size, AA_PROFILE_BASE,
|
||||
STRARRAYLEN(AA_PROFILE_BASE));
|
||||
|
||||
+ append_all_remount_rules(&profile, &size);
|
||||
+
|
||||
if (aa_supports_unix)
|
||||
must_append_sized(&profile, &size, AA_PROFILE_UNIX_SOCKETS,
|
||||
STRARRAYLEN(AA_PROFILE_UNIX_SOCKETS));
|
||||
--
|
||||
2.20.1
|
||||
|
@ -1,27 +0,0 @@
|
||||
From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
|
||||
From: Wolfgang Bumiller <w.bumiller@proxmox.com>
|
||||
Date: Wed, 23 Oct 2019 10:53:21 +0200
|
||||
Subject: [PATCH] apparmor: Prevent writes to /proc/acpi/**
|
||||
|
||||
Same as #3117.
|
||||
|
||||
Signed-off-by: Wolfgang Bumiller <w.bumiller@proxmox.com>
|
||||
---
|
||||
src/lxc/lsm/apparmor.c | 1 +
|
||||
1 file changed, 1 insertion(+)
|
||||
|
||||
diff --git a/src/lxc/lsm/apparmor.c b/src/lxc/lsm/apparmor.c
|
||||
index 47f825866..8aebb21af 100644
|
||||
--- a/src/lxc/lsm/apparmor.c
|
||||
+++ b/src/lxc/lsm/apparmor.c
|
||||
@@ -121,6 +121,7 @@ static const char AA_PROFILE_BASE[] =
|
||||
" # block some other dangerous paths\n"
|
||||
" deny @{PROC}/kcore rwklx,\n"
|
||||
" deny @{PROC}/sysrq-trigger rwklx,\n"
|
||||
+" deny @{PROC}/acpi/** rwklx,\n"
|
||||
"\n"
|
||||
" # deny writes in /sys except for /sys/fs/cgroup, also allow\n"
|
||||
" # fusectl, securityfs and debugfs to be mounted there (read-only)\n"
|
||||
--
|
||||
2.20.1
|
||||
|
13
debian/patches/series
vendored
13
debian/patches/series
vendored
@ -1,12 +1,3 @@
|
||||
pve/0001-PVE-Config-lxc.service-start-after-a-potential-syslo.patch
|
||||
pve/0002-PVE-Down-run-lxcnetaddbr-when-instantiating-veths.patch
|
||||
pve/0003-PVE-Config-deny-rw-mounting-of-sys-and-proc.patch
|
||||
pve/0004-PVE-Up-possibility-to-run-lxc-monitord-as-a-regular-.patch
|
||||
pve/0005-PVE-Config-Disable-lxc.monitor-cgroup.patch
|
||||
pve/0006-PVE-Up-separate-the-limiting-from-the-namespaced-cgr.patch
|
||||
pve/0007-PVE-Up-start-initutils-make-cgroupns-separation-leve.patch
|
||||
pve/0008-PVE-Config-namespace-separation.patch
|
||||
pve/0009-PVE-Config-attach-always-use-getent.patch
|
||||
pve/0010-init-add-ExecReload-to-lxc.service-to-only-reload-pr.patch
|
||||
pve/0011-apparmor-generate-ro-bind-remount-rule-list.patch
|
||||
pve/0012-apparmor-Prevent-writes-to-proc-acpi.patch
|
||||
pve/0002-PVE-Config-deny-rw-mounting-of-sys-and-proc.patch
|
||||
pve/0003-PVE-Config-attach-always-use-getent.patch
|
||||
|
2
lxc
2
lxc
@ -1 +1 @@
|
||||
Subproject commit 344b8ee293f4d3730a70a6ccaa03d7e4a516ae95
|
||||
Subproject commit d8ccf906038e7ca3241e572192ffa59999adb923
|
Loading…
x
Reference in New Issue
Block a user