1
0
mirror of https://github.com/systemd/systemd.git synced 2025-09-20 05:44:42 +03:00

Merge pull request #1251 from poettering/cgroups-cleanup

cgroups cleanup + other fixes
This commit is contained in:
Daniel Mack
2015-09-13 20:44:30 +02:00
21 changed files with 284 additions and 1471 deletions

View File

@@ -865,8 +865,6 @@ libbasic_la_SOURCES = \
src/basic/xml.h \ src/basic/xml.h \
src/basic/json.c \ src/basic/json.c \
src/basic/json.h \ src/basic/json.h \
src/basic/ring.c \
src/basic/ring.h \
src/basic/barrier.c \ src/basic/barrier.c \
src/basic/barrier.h \ src/basic/barrier.h \
src/basic/async.c \ src/basic/async.c \
@@ -954,8 +952,6 @@ libshared_la_SOURCES = \
src/shared/ima-util.h \ src/shared/ima-util.h \
src/shared/ptyfwd.c \ src/shared/ptyfwd.c \
src/shared/ptyfwd.h \ src/shared/ptyfwd.h \
src/shared/pty.c \
src/shared/pty.h \
src/shared/base-filesystem.c \ src/shared/base-filesystem.c \
src/shared/base-filesystem.h \ src/shared/base-filesystem.h \
src/shared/uid-range.c \ src/shared/uid-range.c \
@@ -1402,9 +1398,7 @@ tests += \
test-process-util \ test-process-util \
test-terminal-util \ test-terminal-util \
test-path-lookup \ test-path-lookup \
test-ring \
test-barrier \ test-barrier \
test-pty \
test-tmpfiles \ test-tmpfiles \
test-namespace \ test-namespace \
test-date \ test-date \
@@ -1721,24 +1715,12 @@ test_socket_util_SOURCES = \
test_socket_util_LDADD = \ test_socket_util_LDADD = \
libshared.la libshared.la
test_ring_SOURCES = \
src/test/test-ring.c
test_ring_LDADD = \
libshared.la
test_barrier_SOURCES = \ test_barrier_SOURCES = \
src/test/test-barrier.c src/test/test-barrier.c
test_barrier_LDADD = \ test_barrier_LDADD = \
libshared.la libshared.la
test_pty_SOURCES = \
src/test/test-pty.c
test_pty_LDADD = \
libcore.la
test_tmpfiles_SOURCES = \ test_tmpfiles_SOURCES = \
src/test/test-tmpfiles.c src/test/test-tmpfiles.c

View File

@@ -118,10 +118,11 @@
<listitem> <listitem>
<para>Assign the specified CPU time share weight to the <para>Assign the specified CPU time share weight to the
processes executed. Those options take an integer value and processes executed. These options take an integer value and
control the <literal>cpu.shares</literal> control group control the <literal>cpu.shares</literal> control group
attribute, which defaults to 1024. For details about this attribute. The allowed range is 2 to 262144. Defaults to
control group attribute, see <ulink 1024. For details about this control group attribute, see
<ulink
url="https://www.kernel.org/doc/Documentation/scheduler/sched-design-CFS.txt">sched-design-CFS.txt</ulink>. url="https://www.kernel.org/doc/Documentation/scheduler/sched-design-CFS.txt">sched-design-CFS.txt</ulink>.
The available CPU time is split up among all units within The available CPU time is split up among all units within
one slice relative to their CPU time share weight.</para> one slice relative to their CPU time share weight.</para>
@@ -258,7 +259,7 @@
the executed processes. Takes a single weight value (between the executed processes. Takes a single weight value (between
10 and 1000) to set the default block IO weight. This controls 10 and 1000) to set the default block IO weight. This controls
the <literal>blkio.weight</literal> control group attribute, the <literal>blkio.weight</literal> control group attribute,
which defaults to 1000. For details about this control group which defaults to 500. For details about this control group
attribute, see <ulink attribute, see <ulink
url="https://www.kernel.org/doc/Documentation/cgroups/blkio-controller.txt">blkio-controller.txt</ulink>. url="https://www.kernel.org/doc/Documentation/cgroups/blkio-controller.txt">blkio-controller.txt</ulink>.
The available IO bandwidth is split up among all units within The available IO bandwidth is split up among all units within

View File

@@ -2207,6 +2207,46 @@ bool cg_is_legacy_wanted(void) {
return !cg_is_unified_wanted(); return !cg_is_unified_wanted();
} }
int cg_cpu_shares_parse(const char *s, uint64_t *ret) {
uint64_t u;
int r;
if (isempty(s)) {
*ret = CGROUP_CPU_SHARES_INVALID;
return 0;
}
r = safe_atou64(s, &u);
if (r < 0)
return r;
if (u < CGROUP_CPU_SHARES_MIN || u > CGROUP_CPU_SHARES_MAX)
return -ERANGE;
*ret = u;
return 0;
}
int cg_blkio_weight_parse(const char *s, uint64_t *ret) {
uint64_t u;
int r;
if (isempty(s)) {
*ret = CGROUP_BLKIO_WEIGHT_INVALID;
return 0;
}
r = safe_atou64(s, &u);
if (r < 0)
return r;
if (u < CGROUP_BLKIO_WEIGHT_MIN || u > CGROUP_BLKIO_WEIGHT_MAX)
return -ERANGE;
*ret = u;
return 0;
}
static const char *cgroup_controller_table[_CGROUP_CONTROLLER_MAX] = { static const char *cgroup_controller_table[_CGROUP_CONTROLLER_MAX] = {
[CGROUP_CONTROLLER_CPU] = "cpu", [CGROUP_CONTROLLER_CPU] = "cpu",
[CGROUP_CONTROLLER_CPUACCT] = "cpuacct", [CGROUP_CONTROLLER_CPUACCT] = "cpuacct",

View File

@@ -53,6 +53,30 @@ typedef enum CGroupMask {
_CGROUP_MASK_ALL = CGROUP_CONTROLLER_TO_MASK(_CGROUP_CONTROLLER_MAX) - 1 _CGROUP_MASK_ALL = CGROUP_CONTROLLER_TO_MASK(_CGROUP_CONTROLLER_MAX) - 1
} CGroupMask; } CGroupMask;
/* Special values for the cpu.shares attribute */
#define CGROUP_CPU_SHARES_INVALID ((uint64_t) -1)
#define CGROUP_CPU_SHARES_MIN UINT64_C(2)
#define CGROUP_CPU_SHARES_MAX UINT64_C(262144)
#define CGROUP_CPU_SHARES_DEFAULT UINT64_C(1024)
static inline bool CGROUP_CPU_SHARES_IS_OK(uint64_t x) {
return
x == CGROUP_CPU_SHARES_INVALID ||
(x >= CGROUP_CPU_SHARES_MIN && x <= CGROUP_CPU_SHARES_MAX);
}
/* Special values for the blkio.weight attribute */
#define CGROUP_BLKIO_WEIGHT_INVALID ((uint64_t) -1)
#define CGROUP_BLKIO_WEIGHT_MIN UINT64_C(10)
#define CGROUP_BLKIO_WEIGHT_MAX UINT64_C(1000)
#define CGROUP_BLKIO_WEIGHT_DEFAULT UINT64_C(500)
static inline bool CGROUP_BLKIO_WEIGHT_IS_OK(uint64_t x) {
return
x == CGROUP_BLKIO_WEIGHT_INVALID ||
(x >= CGROUP_BLKIO_WEIGHT_MIN && x <= CGROUP_BLKIO_WEIGHT_MAX);
}
/* /*
* General rules: * General rules:
* *
@@ -161,3 +185,6 @@ bool cg_is_legacy_wanted(void);
const char* cgroup_controller_to_string(CGroupController c) _const_; const char* cgroup_controller_to_string(CGroupController c) _const_;
CGroupController cgroup_controller_from_string(const char *s) _pure_; CGroupController cgroup_controller_from_string(const char *s) _pure_;
int cg_cpu_shares_parse(const char *s, uint64_t *ret);
int cg_blkio_weight_parse(const char *s, uint64_t *ret);

View File

@@ -1,209 +0,0 @@
/*-*- Mode: C; c-basic-offset: 8; indent-tabs-mode: nil -*-*/
/***
This file is part of systemd.
Copyright 2014 David Herrmann <dh.herrmann@gmail.com>
systemd is free software; you can redistribute it and/or modify it
under the terms of the GNU Lesser General Public License as published by
the Free Software Foundation; either version 2.1 of the License, or
(at your option) any later version.
systemd is distributed in the hope that it will be useful, but
WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
Lesser General Public License for more details.
You should have received a copy of the GNU Lesser General Public License
along with systemd; If not, see <http://www.gnu.org/licenses/>.
***/
#include <errno.h>
#include <stdlib.h>
#include <string.h>
#include <sys/uio.h>
#include "macro.h"
#include "ring.h"
#define RING_MASK(_r, _v) ((_v) & ((_r)->size - 1))
void ring_flush(Ring *r) {
assert(r);
r->start = 0;
r->used = 0;
}
void ring_clear(Ring *r) {
assert(r);
free(r->buf);
zero(*r);
}
/*
* Get data pointers for current ring-buffer data. @vec must be an array of 2
* iovec objects. They are filled according to the data available in the
* ring-buffer. 0, 1 or 2 is returned according to the number of iovec objects
* that were filled (0 meaning buffer is empty).
*
* Hint: "struct iovec" is defined in <sys/uio.h> and looks like this:
* struct iovec {
* void *iov_base;
* size_t iov_len;
* };
*/
size_t ring_peek(Ring *r, struct iovec *vec) {
assert(r);
if (r->used == 0) {
return 0;
} else if (r->start + r->used <= r->size) {
if (vec) {
vec[0].iov_base = &r->buf[r->start];
vec[0].iov_len = r->used;
}
return 1;
} else {
if (vec) {
vec[0].iov_base = &r->buf[r->start];
vec[0].iov_len = r->size - r->start;
vec[1].iov_base = r->buf;
vec[1].iov_len = r->used - (r->size - r->start);
}
return 2;
}
}
/*
* Copy data from the ring buffer into the linear external buffer @buf. Copy
* at most @size bytes. If the ring buffer size is smaller, copy less bytes and
* return the number of bytes copied.
*/
size_t ring_copy(Ring *r, void *buf, size_t size) {
size_t l;
assert(r);
assert(buf);
if (size > r->used)
size = r->used;
if (size > 0) {
l = r->size - r->start;
if (size <= l) {
memcpy(buf, &r->buf[r->start], size);
} else {
memcpy(buf, &r->buf[r->start], l);
memcpy((uint8_t*)buf + l, r->buf, size - l);
}
}
return size;
}
/*
* Resize ring-buffer to size @nsize. @nsize must be a power-of-2, otherwise
* ring operations will behave incorrectly.
*/
static int ring_resize(Ring *r, size_t nsize) {
uint8_t *buf;
size_t l;
assert(r);
assert(nsize > 0);
buf = malloc(nsize);
if (!buf)
return -ENOMEM;
if (r->used > 0) {
l = r->size - r->start;
if (r->used <= l) {
memcpy(buf, &r->buf[r->start], r->used);
} else {
memcpy(buf, &r->buf[r->start], l);
memcpy(&buf[l], r->buf, r->used - l);
}
}
free(r->buf);
r->buf = buf;
r->size = nsize;
r->start = 0;
return 0;
}
/*
* Resize ring-buffer to provide enough room for @add bytes of new data. This
* resizes the buffer if it is too small. It returns -ENOMEM on OOM and 0 on
* success.
*/
static int ring_grow(Ring *r, size_t add) {
size_t need;
assert(r);
if (r->size - r->used >= add)
return 0;
need = r->used + add;
if (need <= r->used)
return -ENOMEM;
else if (need < 4096)
need = 4096;
need = ALIGN_POWER2(need);
if (need == 0)
return -ENOMEM;
return ring_resize(r, need);
}
/*
* Push @len bytes from @u8 into the ring buffer. The buffer is resized if it
* is too small. -ENOMEM is returned on OOM, 0 on success.
*/
int ring_push(Ring *r, const void *u8, size_t size) {
int err;
size_t pos, l;
assert(r);
assert(u8);
if (size == 0)
return 0;
err = ring_grow(r, size);
if (err < 0)
return err;
pos = RING_MASK(r, r->start + r->used);
l = r->size - pos;
if (l >= size) {
memcpy(&r->buf[pos], u8, size);
} else {
memcpy(&r->buf[pos], u8, l);
memcpy(r->buf, (const uint8_t*)u8 + l, size - l);
}
r->used += size;
return 0;
}
/*
* Remove @len bytes from the start of the ring-buffer. Note that we protect
* against overflows so removing more bytes than available is safe.
*/
void ring_pull(Ring *r, size_t size) {
assert(r);
if (size > r->used)
size = r->used;
r->start = RING_MASK(r, r->start + size);
r->used -= size;
}

View File

@@ -1,55 +0,0 @@
/*-*- Mode: C; c-basic-offset: 8; indent-tabs-mode: nil -*-*/
#pragma once
/***
This file is part of systemd.
Copyright 2014 David Herrmann <dh.herrmann@gmail.com>
systemd is free software; you can redistribute it and/or modify it
under the terms of the GNU Lesser General Public License as published by
the Free Software Foundation; either version 2.1 of the License, or
(at your option) any later version.
systemd is distributed in the hope that it will be useful, but
WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
Lesser General Public License for more details.
You should have received a copy of the GNU Lesser General Public License
along with systemd; If not, see <http://www.gnu.org/licenses/>.
***/
typedef struct Ring Ring;
struct Ring {
uint8_t *buf; /* buffer or NULL */
size_t size; /* actual size of @buf */
size_t start; /* start position of ring */
size_t used; /* number of actually used bytes */
};
/* flush buffer so it is empty again */
void ring_flush(Ring *r);
/* flush buffer, free allocated data and reset to initial state */
void ring_clear(Ring *r);
/* get pointers to buffer data and their length */
size_t ring_peek(Ring *r, struct iovec *vec);
/* copy data into external linear buffer */
size_t ring_copy(Ring *r, void *buf, size_t size);
/* push data to the end of the buffer */
int ring_push(Ring *r, const void *u8, size_t size);
/* pull data from the front of the buffer */
void ring_pull(Ring *r, size_t size);
/* return size of occupied buffer in bytes */
static inline size_t ring_get_size(Ring *r) {
return r->used;
}

View File

@@ -37,14 +37,16 @@ void cgroup_context_init(CGroupContext *c) {
/* Initialize everything to the kernel defaults, assuming the /* Initialize everything to the kernel defaults, assuming the
* structure is preinitialized to 0 */ * structure is preinitialized to 0 */
c->cpu_shares = (unsigned long) -1; c->cpu_shares = CGROUP_CPU_SHARES_INVALID;
c->startup_cpu_shares = (unsigned long) -1; c->startup_cpu_shares = CGROUP_CPU_SHARES_INVALID;
c->memory_limit = (uint64_t) -1;
c->blockio_weight = (unsigned long) -1;
c->startup_blockio_weight = (unsigned long) -1;
c->tasks_max = (uint64_t) -1;
c->cpu_quota_per_sec_usec = USEC_INFINITY; c->cpu_quota_per_sec_usec = USEC_INFINITY;
c->memory_limit = (uint64_t) -1;
c->blockio_weight = CGROUP_BLKIO_WEIGHT_INVALID;
c->startup_blockio_weight = CGROUP_BLKIO_WEIGHT_INVALID;
c->tasks_max = (uint64_t) -1;
} }
void cgroup_context_free_device_allow(CGroupContext *c, CGroupDeviceAllow *a) { void cgroup_context_free_device_allow(CGroupContext *c, CGroupDeviceAllow *a) {
@@ -102,11 +104,12 @@ void cgroup_context_dump(CGroupContext *c, FILE* f, const char *prefix) {
"%sCPUAccounting=%s\n" "%sCPUAccounting=%s\n"
"%sBlockIOAccounting=%s\n" "%sBlockIOAccounting=%s\n"
"%sMemoryAccounting=%s\n" "%sMemoryAccounting=%s\n"
"%sCPUShares=%lu\n" "%sTasksAccounting=%s\n"
"%sStartupCPUShares=%lu\n" "%sCPUShares=%" PRIu64 "\n"
"%sStartupCPUShares=%" PRIu64 "\n"
"%sCPUQuotaPerSecSec=%s\n" "%sCPUQuotaPerSecSec=%s\n"
"%sBlockIOWeight=%lu\n" "%sBlockIOWeight=%" PRIu64 "\n"
"%sStartupBlockIOWeight=%lu\n" "%sStartupBlockIOWeight=%" PRIu64 "\n"
"%sMemoryLimit=%" PRIu64 "\n" "%sMemoryLimit=%" PRIu64 "\n"
"%sTasksMax=%" PRIu64 "\n" "%sTasksMax=%" PRIu64 "\n"
"%sDevicePolicy=%s\n" "%sDevicePolicy=%s\n"
@@ -114,6 +117,7 @@ void cgroup_context_dump(CGroupContext *c, FILE* f, const char *prefix) {
prefix, yes_no(c->cpu_accounting), prefix, yes_no(c->cpu_accounting),
prefix, yes_no(c->blockio_accounting), prefix, yes_no(c->blockio_accounting),
prefix, yes_no(c->memory_accounting), prefix, yes_no(c->memory_accounting),
prefix, yes_no(c->tasks_accounting),
prefix, c->cpu_shares, prefix, c->cpu_shares,
prefix, c->startup_cpu_shares, prefix, c->startup_cpu_shares,
prefix, format_timespan(u, sizeof(u), c->cpu_quota_per_sec_usec, 1), prefix, format_timespan(u, sizeof(u), c->cpu_quota_per_sec_usec, 1),
@@ -133,7 +137,7 @@ void cgroup_context_dump(CGroupContext *c, FILE* f, const char *prefix) {
LIST_FOREACH(device_weights, w, c->blockio_device_weights) LIST_FOREACH(device_weights, w, c->blockio_device_weights)
fprintf(f, fprintf(f,
"%sBlockIODeviceWeight=%s %lu", "%sBlockIODeviceWeight=%s %" PRIu64,
prefix, prefix,
w->path, w->path,
w->weight); w->weight);
@@ -309,11 +313,11 @@ void cgroup_context_apply(CGroupContext *c, CGroupMask mask, const char *path, M
* and missing cgroups, i.e. EROFS and ENOENT. */ * and missing cgroups, i.e. EROFS and ENOENT. */
if ((mask & CGROUP_MASK_CPU) && !is_root) { if ((mask & CGROUP_MASK_CPU) && !is_root) {
char buf[MAX(DECIMAL_STR_MAX(unsigned long), DECIMAL_STR_MAX(usec_t)) + 1]; char buf[MAX(DECIMAL_STR_MAX(uint64_t), DECIMAL_STR_MAX(usec_t)) + 1];
sprintf(buf, "%lu\n", sprintf(buf, "%" PRIu64 "\n",
IN_SET(state, MANAGER_STARTING, MANAGER_INITIALIZING) && c->startup_cpu_shares != (unsigned long) -1 ? c->startup_cpu_shares : IN_SET(state, MANAGER_STARTING, MANAGER_INITIALIZING) && c->startup_cpu_shares != CGROUP_CPU_SHARES_INVALID ? c->startup_cpu_shares :
c->cpu_shares != (unsigned long) -1 ? c->cpu_shares : 1024); c->cpu_shares != CGROUP_CPU_SHARES_INVALID ? c->cpu_shares : CGROUP_CPU_SHARES_DEFAULT);
r = cg_set_attribute("cpu", path, "cpu.shares", buf); r = cg_set_attribute("cpu", path, "cpu.shares", buf);
if (r < 0) if (r < 0)
log_full_errno(IN_SET(r, -ENOENT, -EROFS) ? LOG_DEBUG : LOG_WARNING, r, log_full_errno(IN_SET(r, -ENOENT, -EROFS) ? LOG_DEBUG : LOG_WARNING, r,
@@ -336,15 +340,15 @@ void cgroup_context_apply(CGroupContext *c, CGroupMask mask, const char *path, M
} }
if (mask & CGROUP_MASK_BLKIO) { if (mask & CGROUP_MASK_BLKIO) {
char buf[MAX3(DECIMAL_STR_MAX(unsigned long)+1, char buf[MAX(DECIMAL_STR_MAX(uint64_t)+1,
DECIMAL_STR_MAX(dev_t)*2+2+DECIMAL_STR_MAX(unsigned long)*1,
DECIMAL_STR_MAX(dev_t)*2+2+DECIMAL_STR_MAX(uint64_t)+1)]; DECIMAL_STR_MAX(dev_t)*2+2+DECIMAL_STR_MAX(uint64_t)+1)];
CGroupBlockIODeviceWeight *w; CGroupBlockIODeviceWeight *w;
CGroupBlockIODeviceBandwidth *b; CGroupBlockIODeviceBandwidth *b;
if (!is_root) { if (!is_root) {
sprintf(buf, "%lu\n", IN_SET(state, MANAGER_STARTING, MANAGER_INITIALIZING) && c->startup_blockio_weight != (unsigned long) -1 ? c->startup_blockio_weight : sprintf(buf, "%" PRIu64 "\n",
c->blockio_weight != (unsigned long) -1 ? c->blockio_weight : 1000); IN_SET(state, MANAGER_STARTING, MANAGER_INITIALIZING) && c->startup_blockio_weight != CGROUP_BLKIO_WEIGHT_INVALID ? c->startup_blockio_weight :
c->blockio_weight != CGROUP_BLKIO_WEIGHT_INVALID ? c->blockio_weight : CGROUP_BLKIO_WEIGHT_DEFAULT);
r = cg_set_attribute("blkio", path, "blkio.weight", buf); r = cg_set_attribute("blkio", path, "blkio.weight", buf);
if (r < 0) if (r < 0)
log_full_errno(IN_SET(r, -ENOENT, -EROFS) ? LOG_DEBUG : LOG_WARNING, r, log_full_errno(IN_SET(r, -ENOENT, -EROFS) ? LOG_DEBUG : LOG_WARNING, r,
@@ -358,7 +362,7 @@ void cgroup_context_apply(CGroupContext *c, CGroupMask mask, const char *path, M
if (r < 0) if (r < 0)
continue; continue;
sprintf(buf, "%u:%u %lu", major(dev), minor(dev), w->weight); sprintf(buf, "%u:%u %" PRIu64 "\n", major(dev), minor(dev), w->weight);
r = cg_set_attribute("blkio", path, "blkio.weight_device", buf); r = cg_set_attribute("blkio", path, "blkio.weight_device", buf);
if (r < 0) if (r < 0)
log_full_errno(IN_SET(r, -ENOENT, -EROFS) ? LOG_DEBUG : LOG_WARNING, r, log_full_errno(IN_SET(r, -ENOENT, -EROFS) ? LOG_DEBUG : LOG_WARNING, r,
@@ -493,14 +497,14 @@ CGroupMask cgroup_context_get_mask(CGroupContext *c) {
/* Figure out which controllers we need */ /* Figure out which controllers we need */
if (c->cpu_accounting || if (c->cpu_accounting ||
c->cpu_shares != (unsigned long) -1 || c->cpu_shares != CGROUP_CPU_SHARES_INVALID ||
c->startup_cpu_shares != (unsigned long) -1 || c->startup_cpu_shares != CGROUP_CPU_SHARES_INVALID ||
c->cpu_quota_per_sec_usec != USEC_INFINITY) c->cpu_quota_per_sec_usec != USEC_INFINITY)
mask |= CGROUP_MASK_CPUACCT | CGROUP_MASK_CPU; mask |= CGROUP_MASK_CPUACCT | CGROUP_MASK_CPU;
if (c->blockio_accounting || if (c->blockio_accounting ||
c->blockio_weight != (unsigned long) -1 || c->blockio_weight != CGROUP_BLKIO_WEIGHT_INVALID ||
c->startup_blockio_weight != (unsigned long) -1 || c->startup_blockio_weight != CGROUP_BLKIO_WEIGHT_INVALID ||
c->blockio_device_weights || c->blockio_device_weights ||
c->blockio_device_bandwidths) c->blockio_device_bandwidths)
mask |= CGROUP_MASK_BLKIO; mask |= CGROUP_MASK_BLKIO;
@@ -1577,6 +1581,32 @@ bool unit_cgroup_delegate(Unit *u) {
return c->delegate; return c->delegate;
} }
void unit_invalidate_cgroup(Unit *u, CGroupMask m) {
assert(u);
if (!UNIT_HAS_CGROUP_CONTEXT(u))
return;
if (m == 0)
return;
if ((u->cgroup_realized_mask & m) == 0)
return;
u->cgroup_realized_mask &= ~m;
unit_add_to_cgroup_queue(u);
}
void manager_invalidate_startup_units(Manager *m) {
Iterator i;
Unit *u;
assert(m);
SET_FOREACH(u, m->startup_units, i)
unit_invalidate_cgroup(u, CGROUP_MASK_CPU|CGROUP_MASK_BLKIO);
}
static const char* const cgroup_device_policy_table[_CGROUP_DEVICE_POLICY_MAX] = { static const char* const cgroup_device_policy_table[_CGROUP_DEVICE_POLICY_MAX] = {
[CGROUP_AUTO] = "auto", [CGROUP_AUTO] = "auto",
[CGROUP_CLOSED] = "closed", [CGROUP_CLOSED] = "closed",

View File

@@ -58,7 +58,7 @@ struct CGroupDeviceAllow {
struct CGroupBlockIODeviceWeight { struct CGroupBlockIODeviceWeight {
LIST_FIELDS(CGroupBlockIODeviceWeight, device_weights); LIST_FIELDS(CGroupBlockIODeviceWeight, device_weights);
char *path; char *path;
unsigned long weight; uint64_t weight;
}; };
struct CGroupBlockIODeviceBandwidth { struct CGroupBlockIODeviceBandwidth {
@@ -74,12 +74,12 @@ struct CGroupContext {
bool memory_accounting; bool memory_accounting;
bool tasks_accounting; bool tasks_accounting;
unsigned long cpu_shares; uint64_t cpu_shares;
unsigned long startup_cpu_shares; uint64_t startup_cpu_shares;
usec_t cpu_quota_per_sec_usec; usec_t cpu_quota_per_sec_usec;
unsigned long blockio_weight; uint64_t blockio_weight;
unsigned long startup_blockio_weight; uint64_t startup_blockio_weight;
LIST_HEAD(CGroupBlockIODeviceWeight, blockio_device_weights); LIST_HEAD(CGroupBlockIODeviceWeight, blockio_device_weights);
LIST_HEAD(CGroupBlockIODeviceBandwidth, blockio_device_bandwidths); LIST_HEAD(CGroupBlockIODeviceBandwidth, blockio_device_bandwidths);
@@ -88,9 +88,9 @@ struct CGroupContext {
CGroupDevicePolicy device_policy; CGroupDevicePolicy device_policy;
LIST_HEAD(CGroupDeviceAllow, device_allow); LIST_HEAD(CGroupDeviceAllow, device_allow);
bool delegate;
uint64_t tasks_max; uint64_t tasks_max;
bool delegate;
}; };
#include "unit.h" #include "unit.h"
@@ -149,5 +149,9 @@ bool unit_cgroup_delegate(Unit *u);
int unit_notify_cgroup_empty(Unit *u); int unit_notify_cgroup_empty(Unit *u);
int manager_notify_cgroup_empty(Manager *m, const char *group); int manager_notify_cgroup_empty(Manager *m, const char *group);
void unit_invalidate_cgroup(Unit *u, CGroupMask m);
void manager_invalidate_startup_units(Manager *m);
const char* cgroup_device_policy_to_string(CGroupDevicePolicy i) _const_; const char* cgroup_device_policy_to_string(CGroupDevicePolicy i) _const_;
CGroupDevicePolicy cgroup_device_policy_from_string(const char *s) _pure_; CGroupDevicePolicy cgroup_device_policy_from_string(const char *s) _pure_;

View File

@@ -133,34 +133,16 @@ static int property_get_device_allow(
return sd_bus_message_close_container(reply); return sd_bus_message_close_container(reply);
} }
static int property_get_ulong_as_u64(
sd_bus *bus,
const char *path,
const char *interface,
const char *property,
sd_bus_message *reply,
void *userdata,
sd_bus_error *error) {
unsigned long *ul = userdata;
assert(bus);
assert(reply);
assert(ul);
return sd_bus_message_append(reply, "t", *ul == (unsigned long) -1 ? (uint64_t) -1 : (uint64_t) *ul);
}
const sd_bus_vtable bus_cgroup_vtable[] = { const sd_bus_vtable bus_cgroup_vtable[] = {
SD_BUS_VTABLE_START(0), SD_BUS_VTABLE_START(0),
SD_BUS_PROPERTY("Delegate", "b", bus_property_get_bool, offsetof(CGroupContext, delegate), 0), SD_BUS_PROPERTY("Delegate", "b", bus_property_get_bool, offsetof(CGroupContext, delegate), 0),
SD_BUS_PROPERTY("CPUAccounting", "b", bus_property_get_bool, offsetof(CGroupContext, cpu_accounting), 0), SD_BUS_PROPERTY("CPUAccounting", "b", bus_property_get_bool, offsetof(CGroupContext, cpu_accounting), 0),
SD_BUS_PROPERTY("CPUShares", "t", property_get_ulong_as_u64, offsetof(CGroupContext, cpu_shares), 0), SD_BUS_PROPERTY("CPUShares", "t", NULL, offsetof(CGroupContext, cpu_shares), 0),
SD_BUS_PROPERTY("StartupCPUShares", "t", property_get_ulong_as_u64, offsetof(CGroupContext, startup_cpu_shares), 0), SD_BUS_PROPERTY("StartupCPUShares", "t", NULL, offsetof(CGroupContext, startup_cpu_shares), 0),
SD_BUS_PROPERTY("CPUQuotaPerSecUSec", "t", bus_property_get_usec, offsetof(CGroupContext, cpu_quota_per_sec_usec), 0), SD_BUS_PROPERTY("CPUQuotaPerSecUSec", "t", bus_property_get_usec, offsetof(CGroupContext, cpu_quota_per_sec_usec), 0),
SD_BUS_PROPERTY("BlockIOAccounting", "b", bus_property_get_bool, offsetof(CGroupContext, blockio_accounting), 0), SD_BUS_PROPERTY("BlockIOAccounting", "b", bus_property_get_bool, offsetof(CGroupContext, blockio_accounting), 0),
SD_BUS_PROPERTY("BlockIOWeight", "t", property_get_ulong_as_u64, offsetof(CGroupContext, blockio_weight), 0), SD_BUS_PROPERTY("BlockIOWeight", "t", NULL, offsetof(CGroupContext, blockio_weight), 0),
SD_BUS_PROPERTY("StartupBlockIOWeight", "t", property_get_ulong_as_u64, offsetof(CGroupContext, startup_blockio_weight), 0), SD_BUS_PROPERTY("StartupBlockIOWeight", "t", NULL, offsetof(CGroupContext, startup_blockio_weight), 0),
SD_BUS_PROPERTY("BlockIODeviceWeight", "a(st)", property_get_blockio_device_weight, 0, 0), SD_BUS_PROPERTY("BlockIODeviceWeight", "a(st)", property_get_blockio_device_weight, 0, 0),
SD_BUS_PROPERTY("BlockIOReadBandwidth", "a(st)", property_get_blockio_device_bandwidths, 0, 0), SD_BUS_PROPERTY("BlockIOReadBandwidth", "a(st)", property_get_blockio_device_bandwidths, 0, 0),
SD_BUS_PROPERTY("BlockIOWriteBandwidth", "a(st)", property_get_blockio_device_bandwidths, 0, 0), SD_BUS_PROPERTY("BlockIOWriteBandwidth", "a(st)", property_get_blockio_device_bandwidths, 0, 0),
@@ -230,56 +212,52 @@ int bus_cgroup_set_property(
if (mode != UNIT_CHECK) { if (mode != UNIT_CHECK) {
c->cpu_accounting = b; c->cpu_accounting = b;
u->cgroup_realized_mask &= ~CGROUP_MASK_CPUACCT; unit_invalidate_cgroup(u, CGROUP_MASK_CPUACCT|CGROUP_MASK_CPU);
unit_write_drop_in_private(u, mode, name, b ? "CPUAccounting=yes" : "CPUAccounting=no"); unit_write_drop_in_private(u, mode, name, b ? "CPUAccounting=yes" : "CPUAccounting=no");
} }
return 1; return 1;
} else if (streq(name, "CPUShares")) { } else if (streq(name, "CPUShares")) {
uint64_t u64; uint64_t shares;
unsigned long ul;
r = sd_bus_message_read(message, "t", &u64); r = sd_bus_message_read(message, "t", &shares);
if (r < 0) if (r < 0)
return r; return r;
if (u64 == (uint64_t) -1) if (!CGROUP_CPU_SHARES_IS_OK(shares))
ul = (unsigned long) -1;
else {
ul = (unsigned long) u64;
if (ul <= 0 || (uint64_t) ul != u64)
return sd_bus_error_set_errnof(error, EINVAL, "CPUShares value out of range"); return sd_bus_error_set_errnof(error, EINVAL, "CPUShares value out of range");
}
if (mode != UNIT_CHECK) { if (mode != UNIT_CHECK) {
c->cpu_shares = ul; c->cpu_shares = shares;
u->cgroup_realized_mask &= ~CGROUP_MASK_CPU; unit_invalidate_cgroup(u, CGROUP_MASK_CPU);
unit_write_drop_in_private_format(u, mode, name, "CPUShares=%lu", ul);
if (shares == CGROUP_CPU_SHARES_INVALID)
unit_write_drop_in_private(u, mode, name, "CPUShares=");
else
unit_write_drop_in_private_format(u, mode, name, "CPUShares=%" PRIu64, shares);
} }
return 1; return 1;
} else if (streq(name, "StartupCPUShares")) { } else if (streq(name, "StartupCPUShares")) {
uint64_t u64; uint64_t shares;
unsigned long ul;
r = sd_bus_message_read(message, "t", &u64); r = sd_bus_message_read(message, "t", &shares);
if (r < 0) if (r < 0)
return r; return r;
if (u64 == (uint64_t) -1) if (!CGROUP_CPU_SHARES_IS_OK(shares))
ul = (unsigned long) -1;
else {
ul = (unsigned long) u64;
if (ul <= 0 || (uint64_t) ul != u64)
return sd_bus_error_set_errnof(error, EINVAL, "StartupCPUShares value out of range"); return sd_bus_error_set_errnof(error, EINVAL, "StartupCPUShares value out of range");
}
if (mode != UNIT_CHECK) { if (mode != UNIT_CHECK) {
c->startup_cpu_shares = ul; c->startup_cpu_shares = shares;
u->cgroup_realized_mask &= ~CGROUP_MASK_CPU; unit_invalidate_cgroup(u, CGROUP_MASK_CPU);
unit_write_drop_in_private_format(u, mode, name, "StartupCPUShares=%lu", ul);
if (shares == CGROUP_CPU_SHARES_INVALID)
unit_write_drop_in_private(u, mode, name, "StartupCPUShares=");
else
unit_write_drop_in_private_format(u, mode, name, "StartupCPUShares=%" PRIu64, shares);
} }
return 1; return 1;
@@ -296,7 +274,7 @@ int bus_cgroup_set_property(
if (mode != UNIT_CHECK) { if (mode != UNIT_CHECK) {
c->cpu_quota_per_sec_usec = u64; c->cpu_quota_per_sec_usec = u64;
u->cgroup_realized_mask &= ~CGROUP_MASK_CPU; unit_invalidate_cgroup(u, CGROUP_MASK_CPU);
unit_write_drop_in_private_format(u, mode, "CPUQuota", "CPUQuota=%0.f%%", (double) (c->cpu_quota_per_sec_usec / 10000)); unit_write_drop_in_private_format(u, mode, "CPUQuota", "CPUQuota=%0.f%%", (double) (c->cpu_quota_per_sec_usec / 10000));
} }
@@ -311,56 +289,52 @@ int bus_cgroup_set_property(
if (mode != UNIT_CHECK) { if (mode != UNIT_CHECK) {
c->blockio_accounting = b; c->blockio_accounting = b;
u->cgroup_realized_mask &= ~CGROUP_MASK_BLKIO; unit_invalidate_cgroup(u, CGROUP_MASK_BLKIO);
unit_write_drop_in_private(u, mode, name, b ? "BlockIOAccounting=yes" : "BlockIOAccounting=no"); unit_write_drop_in_private(u, mode, name, b ? "BlockIOAccounting=yes" : "BlockIOAccounting=no");
} }
return 1; return 1;
} else if (streq(name, "BlockIOWeight")) { } else if (streq(name, "BlockIOWeight")) {
uint64_t u64; uint64_t weight;
unsigned long ul;
r = sd_bus_message_read(message, "t", &u64); r = sd_bus_message_read(message, "t", &weight);
if (r < 0) if (r < 0)
return r; return r;
if (u64 == (uint64_t) -1) if (!CGROUP_BLKIO_WEIGHT_IS_OK(weight))
ul = (unsigned long) -1;
else {
ul = (unsigned long) u64;
if (ul < 10 || ul > 1000)
return sd_bus_error_set_errnof(error, EINVAL, "BlockIOWeight value out of range"); return sd_bus_error_set_errnof(error, EINVAL, "BlockIOWeight value out of range");
}
if (mode != UNIT_CHECK) { if (mode != UNIT_CHECK) {
c->blockio_weight = ul; c->blockio_weight = weight;
u->cgroup_realized_mask &= ~CGROUP_MASK_BLKIO; unit_invalidate_cgroup(u, CGROUP_MASK_BLKIO);
unit_write_drop_in_private_format(u, mode, name, "BlockIOWeight=%lu", ul);
if (weight == CGROUP_BLKIO_WEIGHT_INVALID)
unit_write_drop_in_private(u, mode, name, "BlockIOWeight=");
else
unit_write_drop_in_private_format(u, mode, name, "BlockIOWeight=%" PRIu64, weight);
} }
return 1; return 1;
} else if (streq(name, "StartupBlockIOWeight")) { } else if (streq(name, "StartupBlockIOWeight")) {
uint64_t u64; uint64_t weight;
unsigned long ul;
r = sd_bus_message_read(message, "t", &u64); r = sd_bus_message_read(message, "t", &weight);
if (r < 0) if (r < 0)
return r; return r;
if (u64 == (uint64_t) -1) if (CGROUP_BLKIO_WEIGHT_IS_OK(weight))
ul = (unsigned long) -1;
else {
ul = (unsigned long) u64;
if (ul < 10 || ul > 1000)
return sd_bus_error_set_errnof(error, EINVAL, "StartupBlockIOWeight value out of range"); return sd_bus_error_set_errnof(error, EINVAL, "StartupBlockIOWeight value out of range");
}
if (mode != UNIT_CHECK) { if (mode != UNIT_CHECK) {
c->startup_blockio_weight = ul; c->startup_blockio_weight = weight;
u->cgroup_realized_mask &= ~CGROUP_MASK_BLKIO; unit_invalidate_cgroup(u, CGROUP_MASK_BLKIO);
unit_write_drop_in_private_format(u, mode, name, "StartupBlockIOWeight=%lu", ul);
if (weight == CGROUP_BLKIO_WEIGHT_INVALID)
unit_write_drop_in_private(u, mode, name, "StartupBlockIOWeight=");
else
unit_write_drop_in_private_format(u, mode, name, "StartupBlockIOWeight=%" PRIu64, weight);
} }
return 1; return 1;
@@ -429,7 +403,7 @@ int bus_cgroup_set_property(
cgroup_context_free_blockio_device_bandwidth(c, a); cgroup_context_free_blockio_device_bandwidth(c, a);
} }
u->cgroup_realized_mask &= ~CGROUP_MASK_BLKIO; unit_invalidate_cgroup(u, CGROUP_MASK_BLKIO);
f = open_memstream(&buf, &size); f = open_memstream(&buf, &size);
if (!f) if (!f)
@@ -455,17 +429,16 @@ int bus_cgroup_set_property(
} else if (streq(name, "BlockIODeviceWeight")) { } else if (streq(name, "BlockIODeviceWeight")) {
const char *path; const char *path;
uint64_t u64; uint64_t weight;
unsigned n = 0; unsigned n = 0;
r = sd_bus_message_enter_container(message, 'a', "(st)"); r = sd_bus_message_enter_container(message, 'a', "(st)");
if (r < 0) if (r < 0)
return r; return r;
while ((r = sd_bus_message_read(message, "(st)", &path, &u64)) > 0) { while ((r = sd_bus_message_read(message, "(st)", &path, &weight)) > 0) {
unsigned long ul = u64;
if (ul < 10 || ul > 1000) if (!CGROUP_BLKIO_WEIGHT_IS_OK(weight) || weight == CGROUP_BLKIO_WEIGHT_INVALID)
return sd_bus_error_set_errnof(error, EINVAL, "BlockIODeviceWeight out of range"); return sd_bus_error_set_errnof(error, EINVAL, "BlockIODeviceWeight out of range");
if (mode != UNIT_CHECK) { if (mode != UNIT_CHECK) {
@@ -491,7 +464,7 @@ int bus_cgroup_set_property(
LIST_PREPEND(device_weights,c->blockio_device_weights, a); LIST_PREPEND(device_weights,c->blockio_device_weights, a);
} }
a->weight = ul; a->weight = weight;
} }
n++; n++;
@@ -512,7 +485,7 @@ int bus_cgroup_set_property(
cgroup_context_free_blockio_device_weight(c, c->blockio_device_weights); cgroup_context_free_blockio_device_weight(c, c->blockio_device_weights);
} }
u->cgroup_realized_mask &= ~CGROUP_MASK_BLKIO; unit_invalidate_cgroup(u, CGROUP_MASK_BLKIO);
f = open_memstream(&buf, &size); f = open_memstream(&buf, &size);
if (!f) if (!f)
@@ -520,7 +493,7 @@ int bus_cgroup_set_property(
fputs("BlockIODeviceWeight=\n", f); fputs("BlockIODeviceWeight=\n", f);
LIST_FOREACH(device_weights, a, c->blockio_device_weights) LIST_FOREACH(device_weights, a, c->blockio_device_weights)
fprintf(f, "BlockIODeviceWeight=%s %lu\n", a->path, a->weight); fprintf(f, "BlockIODeviceWeight=%s %" PRIu64 "\n", a->path, a->weight);
fflush(f); fflush(f);
unit_write_drop_in_private(u, mode, name, buf); unit_write_drop_in_private(u, mode, name, buf);
@@ -537,7 +510,7 @@ int bus_cgroup_set_property(
if (mode != UNIT_CHECK) { if (mode != UNIT_CHECK) {
c->memory_accounting = b; c->memory_accounting = b;
u->cgroup_realized_mask &= ~CGROUP_MASK_MEMORY; unit_invalidate_cgroup(u, CGROUP_MASK_MEMORY);
unit_write_drop_in_private(u, mode, name, b ? "MemoryAccounting=yes" : "MemoryAccounting=no"); unit_write_drop_in_private(u, mode, name, b ? "MemoryAccounting=yes" : "MemoryAccounting=no");
} }
@@ -552,7 +525,7 @@ int bus_cgroup_set_property(
if (mode != UNIT_CHECK) { if (mode != UNIT_CHECK) {
c->memory_limit = limit; c->memory_limit = limit;
u->cgroup_realized_mask &= ~CGROUP_MASK_MEMORY; unit_invalidate_cgroup(u, CGROUP_MASK_MEMORY);
if (limit == (uint64_t) -1) if (limit == (uint64_t) -1)
unit_write_drop_in_private(u, mode, name, "MemoryLimit=infinity"); unit_write_drop_in_private(u, mode, name, "MemoryLimit=infinity");
@@ -578,7 +551,7 @@ int bus_cgroup_set_property(
char *buf; char *buf;
c->device_policy = p; c->device_policy = p;
u->cgroup_realized_mask &= ~CGROUP_MASK_DEVICES; unit_invalidate_cgroup(u, CGROUP_MASK_DEVICES);
buf = strjoina("DevicePolicy=", policy); buf = strjoina("DevicePolicy=", policy);
unit_write_drop_in_private(u, mode, name, buf); unit_write_drop_in_private(u, mode, name, buf);
@@ -657,7 +630,7 @@ int bus_cgroup_set_property(
cgroup_context_free_device_allow(c, c->device_allow); cgroup_context_free_device_allow(c, c->device_allow);
} }
u->cgroup_realized_mask &= ~CGROUP_MASK_DEVICES; unit_invalidate_cgroup(u, CGROUP_MASK_DEVICES);
f = open_memstream(&buf, &size); f = open_memstream(&buf, &size);
if (!f) if (!f)
@@ -682,7 +655,7 @@ int bus_cgroup_set_property(
if (mode != UNIT_CHECK) { if (mode != UNIT_CHECK) {
c->tasks_accounting = b; c->tasks_accounting = b;
u->cgroup_realized_mask &= ~CGROUP_MASK_PIDS; unit_invalidate_cgroup(u, CGROUP_MASK_PIDS);
unit_write_drop_in_private(u, mode, name, b ? "TasksAccounting=yes" : "TasksAccounting=no"); unit_write_drop_in_private(u, mode, name, b ? "TasksAccounting=yes" : "TasksAccounting=no");
} }
@@ -697,7 +670,7 @@ int bus_cgroup_set_property(
if (mode != UNIT_CHECK) { if (mode != UNIT_CHECK) {
c->tasks_max = limit; c->tasks_max = limit;
u->cgroup_realized_mask &= ~CGROUP_MASK_PIDS; unit_invalidate_cgroup(u, CGROUP_MASK_PIDS);
if (limit == (uint64_t) -1) if (limit == (uint64_t) -1)
unit_write_drop_in_private(u, mode, name, "TasksMax=infinity"); unit_write_drop_in_private(u, mode, name, "TasksMax=infinity");

View File

@@ -1160,8 +1160,8 @@ static void do_idle_pipe_dance(int idle_pipe[4]) {
assert(idle_pipe); assert(idle_pipe);
safe_close(idle_pipe[1]); idle_pipe[1] = safe_close(idle_pipe[1]);
safe_close(idle_pipe[2]); idle_pipe[2] = safe_close(idle_pipe[2]);
if (idle_pipe[0] >= 0) { if (idle_pipe[0] >= 0) {
int r; int r;
@@ -1169,18 +1169,20 @@ static void do_idle_pipe_dance(int idle_pipe[4]) {
r = fd_wait_for_event(idle_pipe[0], POLLHUP, IDLE_TIMEOUT_USEC); r = fd_wait_for_event(idle_pipe[0], POLLHUP, IDLE_TIMEOUT_USEC);
if (idle_pipe[3] >= 0 && r == 0 /* timeout */) { if (idle_pipe[3] >= 0 && r == 0 /* timeout */) {
ssize_t n;
/* Signal systemd that we are bored and want to continue. */ /* Signal systemd that we are bored and want to continue. */
r = write(idle_pipe[3], "x", 1); n = write(idle_pipe[3], "x", 1);
if (r > 0) if (n > 0)
/* Wait for systemd to react to the signal above. */ /* Wait for systemd to react to the signal above. */
fd_wait_for_event(idle_pipe[0], POLLHUP, IDLE_TIMEOUT2_USEC); fd_wait_for_event(idle_pipe[0], POLLHUP, IDLE_TIMEOUT2_USEC);
} }
safe_close(idle_pipe[0]); idle_pipe[0] = safe_close(idle_pipe[0]);
} }
safe_close(idle_pipe[3]); idle_pipe[3] = safe_close(idle_pipe[3]);
} }
static int build_environment( static int build_environment(

View File

@@ -2605,26 +2605,19 @@ int config_parse_cpu_shares(
void *data, void *data,
void *userdata) { void *userdata) {
unsigned long *shares = data, lu; uint64_t *shares = data;
int r; int r;
assert(filename); assert(filename);
assert(lvalue); assert(lvalue);
assert(rvalue); assert(rvalue);
if (isempty(rvalue)) { r = cg_cpu_shares_parse(rvalue, shares);
*shares = (unsigned long) -1; if (r < 0) {
log_syntax(unit, LOG_ERR, filename, line, r, "CPU shares '%s' invalid. Ignoring.", rvalue);
return 0; return 0;
} }
r = safe_atolu(rvalue, &lu);
if (r < 0 || lu <= 0) {
log_syntax(unit, LOG_ERR, filename, line, EINVAL,
"CPU shares '%s' invalid. Ignoring.", rvalue);
return 0;
}
*shares = lu;
return 0; return 0;
} }
@@ -2805,26 +2798,19 @@ int config_parse_blockio_weight(
void *data, void *data,
void *userdata) { void *userdata) {
unsigned long *weight = data, lu; uint64_t *weight = data;
int r; int r;
assert(filename); assert(filename);
assert(lvalue); assert(lvalue);
assert(rvalue); assert(rvalue);
if (isempty(rvalue)) { r = cg_blkio_weight_parse(rvalue, weight);
*weight = (unsigned long) -1; if (r < 0) {
log_syntax(unit, LOG_ERR, filename, line, r, "Block IO weight '%s' invalid. Ignoring.", rvalue);
return 0; return 0;
} }
r = safe_atolu(rvalue, &lu);
if (r < 0 || lu < 10 || lu > 1000) {
log_syntax(unit, LOG_ERR, filename, line, EINVAL,
"Block IO weight '%s' invalid. Ignoring.", rvalue);
return 0;
}
*weight = lu;
return 0; return 0;
} }
@@ -2843,8 +2829,8 @@ int config_parse_blockio_device_weight(
_cleanup_free_ char *path = NULL; _cleanup_free_ char *path = NULL;
CGroupBlockIODeviceWeight *w; CGroupBlockIODeviceWeight *w;
CGroupContext *c = data; CGroupContext *c = data;
unsigned long lu;
const char *weight; const char *weight;
uint64_t u;
size_t n; size_t n;
int r; int r;
@@ -2861,9 +2847,10 @@ int config_parse_blockio_device_weight(
n = strcspn(rvalue, WHITESPACE); n = strcspn(rvalue, WHITESPACE);
weight = rvalue + n; weight = rvalue + n;
if (!*weight) { weight += strspn(weight, WHITESPACE);
log_syntax(unit, LOG_ERR, filename, line, EINVAL,
"Expected block device and device weight. Ignoring."); if (isempty(weight)) {
log_syntax(unit, LOG_ERR, filename, line, EINVAL, "Expected block device and device weight. Ignoring.");
return 0; return 0;
} }
@@ -2872,19 +2859,18 @@ int config_parse_blockio_device_weight(
return log_oom(); return log_oom();
if (!path_startswith(path, "/dev")) { if (!path_startswith(path, "/dev")) {
log_syntax(unit, LOG_ERR, filename, line, EINVAL, log_syntax(unit, LOG_ERR, filename, line, EINVAL, "Invalid device node path '%s'. Ignoring.", path);
"Invalid device node path '%s'. Ignoring.", path);
return 0; return 0;
} }
weight += strspn(weight, WHITESPACE); r = cg_blkio_weight_parse(weight, &u);
r = safe_atolu(weight, &lu); if (r < 0) {
if (r < 0 || lu < 10 || lu > 1000) { log_syntax(unit, LOG_ERR, filename, line, r, "Block IO weight '%s' invalid. Ignoring.", weight);
log_syntax(unit, LOG_ERR, filename, line, EINVAL,
"Block IO weight '%s' invalid. Ignoring.", rvalue);
return 0; return 0;
} }
assert(u != CGROUP_BLKIO_WEIGHT_INVALID);
w = new0(CGroupBlockIODeviceWeight, 1); w = new0(CGroupBlockIODeviceWeight, 1);
if (!w) if (!w)
return log_oom(); return log_oom();
@@ -2892,7 +2878,7 @@ int config_parse_blockio_device_weight(
w->path = path; w->path = path;
path = NULL; path = NULL;
w->weight = lu; w->weight = u;
LIST_PREPEND(device_weights, c->blockio_device_weights, w); LIST_PREPEND(device_weights, c->blockio_device_weights, w);
return 0; return 0;

View File

@@ -317,6 +317,8 @@ static int manager_watch_idle_pipe(Manager *m) {
static void manager_close_idle_pipe(Manager *m) { static void manager_close_idle_pipe(Manager *m) {
assert(m); assert(m);
m->idle_pipe_event_source = sd_event_source_unref(m->idle_pipe_event_source);
safe_close_pair(m->idle_pipe); safe_close_pair(m->idle_pipe);
safe_close_pair(m->idle_pipe + 2); safe_close_pair(m->idle_pipe + 2);
} }
@@ -602,14 +604,6 @@ int manager_new(ManagerRunningAs running_as, bool test_run, Manager **_m) {
if (r < 0) if (r < 0)
goto fail; goto fail;
r = set_ensure_allocated(&m->startup_units, NULL);
if (r < 0)
goto fail;
r = set_ensure_allocated(&m->failed_units, NULL);
if (r < 0)
goto fail;
r = sd_event_default(&m->event); r = sd_event_default(&m->event);
if (r < 0) if (r < 0)
goto fail; goto fail;
@@ -944,7 +938,6 @@ Manager* manager_free(Manager *m) {
sd_event_source_unref(m->notify_event_source); sd_event_source_unref(m->notify_event_source);
sd_event_source_unref(m->time_change_event_source); sd_event_source_unref(m->time_change_event_source);
sd_event_source_unref(m->jobs_in_progress_event_source); sd_event_source_unref(m->jobs_in_progress_event_source);
sd_event_source_unref(m->idle_pipe_event_source);
sd_event_source_unref(m->run_queue_event_source); sd_event_source_unref(m->run_queue_event_source);
safe_close(m->signal_fd); safe_close(m->signal_fd);
@@ -1962,7 +1955,6 @@ static int manager_dispatch_idle_pipe_fd(sd_event_source *source, int fd, uint32
m->no_console_output = m->n_on_console > 0; m->no_console_output = m->n_on_console > 0;
m->idle_pipe_event_source = sd_event_source_unref(m->idle_pipe_event_source);
manager_close_idle_pipe(m); manager_close_idle_pipe(m);
return 0; return 0;
@@ -2675,9 +2667,6 @@ static void manager_notify_finished(Manager *m) {
} }
void manager_check_finished(Manager *m) { void manager_check_finished(Manager *m) {
Unit *u = NULL;
Iterator i;
assert(m); assert(m);
if (m->n_reloading > 0) if (m->n_reloading > 0)
@@ -2690,11 +2679,9 @@ void manager_check_finished(Manager *m) {
return; return;
if (hashmap_size(m->jobs) > 0) { if (hashmap_size(m->jobs) > 0) {
if (m->jobs_in_progress_event_source) if (m->jobs_in_progress_event_source)
/* Ignore any failure, this is only for feedback */ /* Ignore any failure, this is only for feedback */
(void) sd_event_source_set_time(m->jobs_in_progress_event_source, (void) sd_event_source_set_time(m->jobs_in_progress_event_source, now(CLOCK_MONOTONIC) + JOBS_IN_PROGRESS_WAIT_USEC);
now(CLOCK_MONOTONIC) + JOBS_IN_PROGRESS_WAIT_USEC);
return; return;
} }
@@ -2702,7 +2689,6 @@ void manager_check_finished(Manager *m) {
manager_flip_auto_status(m, false); manager_flip_auto_status(m, false);
/* Notify Type=idle units that we are done now */ /* Notify Type=idle units that we are done now */
m->idle_pipe_event_source = sd_event_source_unref(m->idle_pipe_event_source);
manager_close_idle_pipe(m); manager_close_idle_pipe(m);
/* Turn off confirm spawn now */ /* Turn off confirm spawn now */
@@ -2721,9 +2707,7 @@ void manager_check_finished(Manager *m) {
manager_notify_finished(m); manager_notify_finished(m);
SET_FOREACH(u, m->startup_units, i) manager_invalidate_startup_units(m);
if (u->cgroup_path)
cgroup_context_apply(unit_get_cgroup_context(u), unit_get_own_mask(u), u->cgroup_path, manager_state(m));
} }
static int create_generator_dir(Manager *m, char **generator, const char *name) { static int create_generator_dir(Manager *m, char **generator, const char *name) {
@@ -3069,8 +3053,9 @@ const char *manager_get_runtime_prefix(Manager *m) {
getenv("XDG_RUNTIME_DIR"); getenv("XDG_RUNTIME_DIR");
} }
void manager_update_failed_units(Manager *m, Unit *u, bool failed) { int manager_update_failed_units(Manager *m, Unit *u, bool failed) {
unsigned size; unsigned size;
int r;
assert(m); assert(m);
assert(u->manager == m); assert(u->manager == m);
@@ -3078,13 +3063,19 @@ void manager_update_failed_units(Manager *m, Unit *u, bool failed) {
size = set_size(m->failed_units); size = set_size(m->failed_units);
if (failed) { if (failed) {
r = set_ensure_allocated(&m->failed_units, NULL);
if (r < 0)
return log_oom();
if (set_put(m->failed_units, u) < 0) if (set_put(m->failed_units, u) < 0)
log_oom(); return log_oom();
} else } else
set_remove(m->failed_units, u); (void) set_remove(m->failed_units, u);
if (set_size(m->failed_units) != size) if (set_size(m->failed_units) != size)
bus_manager_send_change_signal(m); bus_manager_send_change_signal(m);
return 0;
} }
ManagerState manager_state(Manager *m) { ManagerState manager_state(Manager *m) {

View File

@@ -369,7 +369,7 @@ const char *manager_get_runtime_prefix(Manager *m);
ManagerState manager_state(Manager *m); ManagerState manager_state(Manager *m);
void manager_update_failed_units(Manager *m, Unit *u, bool failed); int manager_update_failed_units(Manager *m, Unit *u, bool failed);
const char *manager_state_to_string(ManagerState m) _const_; const char *manager_state_to_string(ManagerState m) _const_;
ManagerState manager_state_from_string(const char *s) _pure_; ManagerState manager_state_from_string(const char *s) _pure_;

View File

@@ -736,8 +736,8 @@ int transaction_activate(Transaction *tr, Manager *m, JobMode mode, sd_bus_error
if (m->idle_pipe[0] < 0 && m->idle_pipe[1] < 0 && if (m->idle_pipe[0] < 0 && m->idle_pipe[1] < 0 &&
m->idle_pipe[2] < 0 && m->idle_pipe[3] < 0) { m->idle_pipe[2] < 0 && m->idle_pipe[3] < 0) {
pipe2(m->idle_pipe, O_NONBLOCK|O_CLOEXEC); (void) pipe2(m->idle_pipe, O_NONBLOCK|O_CLOEXEC);
pipe2(m->idle_pipe + 2, O_NONBLOCK|O_CLOEXEC); (void) pipe2(m->idle_pipe + 2, O_NONBLOCK|O_CLOEXEC);
} }
} }

View File

@@ -528,7 +528,7 @@ void unit_free(Unit *u) {
unit_release_cgroup(u); unit_release_cgroup(u);
manager_update_failed_units(u->manager, u, false); (void) manager_update_failed_units(u->manager, u, false);
set_remove(u->manager->startup_units, u); set_remove(u->manager->startup_units, u);
free(u->description); free(u->description);
@@ -1172,15 +1172,20 @@ static int unit_add_mount_dependencies(Unit *u) {
static int unit_add_startup_units(Unit *u) { static int unit_add_startup_units(Unit *u) {
CGroupContext *c; CGroupContext *c;
int r;
c = unit_get_cgroup_context(u); c = unit_get_cgroup_context(u);
if (!c) if (!c)
return 0; return 0;
if (c->startup_cpu_shares == (unsigned long) -1 && if (c->startup_cpu_shares == CGROUP_CPU_SHARES_INVALID &&
c->startup_blockio_weight == (unsigned long) -1) c->startup_blockio_weight == CGROUP_BLKIO_WEIGHT_INVALID)
return 0; return 0;
r = set_ensure_allocated(&u->manager->startup_units, NULL);
if (r < 0)
return r;
return set_put(u->manager->startup_units, u); return set_put(u->manager->startup_units, u);
} }
@@ -1807,7 +1812,7 @@ void unit_notify(Unit *u, UnitActiveState os, UnitActiveState ns, bool reload_su
} }
/* Keep track of failed units */ /* Keep track of failed units */
manager_update_failed_units(u->manager, u, ns == UNIT_FAILED); (void) manager_update_failed_units(u->manager, u, ns == UNIT_FAILED);
/* Make sure the cgroup is always removed when we become inactive */ /* Make sure the cgroup is always removed when we become inactive */
if (UNIT_IS_INACTIVE_OR_FAILED(ns)) if (UNIT_IS_INACTIVE_OR_FAILED(ns))

View File

@@ -23,22 +23,24 @@
#include "sd-daemon.h" #include "sd-daemon.h"
#include "sd-event.h" #include "sd-event.h"
#include "util.h"
#include "strv.h"
#include "macro.h"
#include "def.h"
#include "path-util.h"
#include "missing.h"
#include "set.h"
#include "signal-util.h"
#include "unit-name.h"
#include "sd-bus.h" #include "sd-bus.h"
#include "bus-error.h" #include "bus-error.h"
#include "bus-internal.h"
#include "bus-label.h" #include "bus-label.h"
#include "bus-message.h" #include "bus-message.h"
#include "cgroup-util.h"
#include "def.h"
#include "macro.h"
#include "missing.h"
#include "path-util.h"
#include "set.h"
#include "signal-util.h"
#include "strv.h"
#include "unit-name.h"
#include "util.h"
#include "bus-util.h" #include "bus-util.h"
#include "bus-internal.h"
static int name_owner_change_callback(sd_bus_message *m, void *userdata, sd_bus_error *ret_error) { static int name_owner_change_callback(sd_bus_message *m, void *userdata, sd_bus_error *ret_error) {
sd_event *e = userdata; sd_event *e = userdata;
@@ -1463,10 +1465,21 @@ int bus_append_unit_property_assignment(sd_bus_message *m, const char *assignmen
r = sd_bus_message_append(m, "v", "t", n); r = sd_bus_message_append(m, "v", "t", n);
} else if (STR_IN_SET(field, "CPUShares", "BlockIOWeight")) { } else if (STR_IN_SET(field, "CPUShares", "StartupCPUShares")) {
uint64_t u; uint64_t u;
r = safe_atou64(eq, &u); r = cg_cpu_shares_parse(eq, &u);
if (r < 0) {
log_error("Failed to parse %s value %s.", field, eq);
return -EINVAL;
}
r = sd_bus_message_append(m, "v", "t", u);
} else if (STR_IN_SET(field, "BlockIOWeight", "StartupBlockIOWeight")) {
uint64_t u;
r = cg_cpu_shares_parse(eq, &u);
if (r < 0) { if (r < 0) {
log_error("Failed to parse %s value %s.", field, eq); log_error("Failed to parse %s value %s.", field, eq);
return -EINVAL; return -EINVAL;

View File

@@ -1,633 +0,0 @@
/*-*- Mode: C; c-basic-offset: 8; indent-tabs-mode: nil -*-*/
/***
This file is part of systemd.
Copyright 2014 David Herrmann <dh.herrmann@gmail.com>
systemd is free software; you can redistribute it and/or modify it
under the terms of the GNU Lesser General Public License as published by
the Free Software Foundation; either version 2.1 of the License, or
(at your option) any later version.
systemd is distributed in the hope that it will be useful, but
WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
Lesser General Public License for more details.
You should have received a copy of the GNU Lesser General Public License
along with systemd; If not, see <http://www.gnu.org/licenses/>.
***/
/*
* PTY
* A PTY object represents a single PTY connection between a master and a
* child. The child process is fork()ed so the caller controls what program
* will be run.
*
* Programs like /bin/login tend to perform a vhangup() on their TTY
* before running the login procedure. This also causes the pty master
* to get a EPOLLHUP event as long as no client has the TTY opened.
* This means, we cannot use the TTY connection as reliable way to track
* the client. Instead, we _must_ rely on the PID of the client to track
* them.
* However, this has the side effect that if the client forks and the
* parent exits, we loose them and restart the client. But this seems to
* be the expected behavior so we implement it here.
*
* Unfortunately, epoll always polls for EPOLLHUP so as long as the
* vhangup() is ongoing, we will _always_ get EPOLLHUP and cannot sleep.
* This gets worse if the client closes the TTY but doesn't exit.
* Therefore, the fd must be edge-triggered in the epoll-set so we
* only get the events once they change.
*/
#include <errno.h>
#include <fcntl.h>
#include <signal.h>
#include <stdbool.h>
#include <stdint.h>
#include <stdlib.h>
#include <sys/epoll.h>
#include <sys/ioctl.h>
#include <sys/uio.h>
#include <sys/wait.h>
#include <termios.h>
#include <unistd.h>
#include "barrier.h"
#include "macro.h"
#include "ring.h"
#include "util.h"
#include "signal-util.h"
#include "pty.h"
#define PTY_BUFSIZE 4096
enum {
PTY_ROLE_UNKNOWN,
PTY_ROLE_PARENT,
PTY_ROLE_CHILD,
};
struct Pty {
unsigned long ref;
Barrier barrier;
int fd;
pid_t child;
sd_event_source *fd_source;
sd_event_source *child_source;
char in_buf[PTY_BUFSIZE];
Ring out_buf;
pty_event_t event_fn;
void *event_fn_userdata;
bool needs_requeue : 1;
unsigned int role : 2;
};
int pty_new(Pty **out) {
_pty_unref_ Pty *pty = NULL;
int r;
assert_return(out, -EINVAL);
pty = new0(Pty, 1);
if (!pty)
return -ENOMEM;
pty->ref = 1;
pty->fd = -1;
pty->barrier = (Barrier) BARRIER_NULL;
pty->fd = posix_openpt(O_RDWR | O_NOCTTY | O_CLOEXEC | O_NONBLOCK);
if (pty->fd < 0)
return -errno;
/*
* The slave-node is initialized to uid/gid of the caller of
* posix_openpt(). Only if devpts is mounted with fixed uid/gid this is
* skipped. In that case, grantpt() can overwrite these, but then you
* have to be root to use chown() (or a pt_chown helper has to be
* present). In those cases grantpt() really does something,
* otherwise it's a no-op. We call grantpt() here to try supporting
* those cases, even though no-one uses that, I guess. If you need other
* access-rights, set them yourself after this call returns (no, this is
* not racy, it looks racy, but races regarding your own UID are never
* important as an attacker could ptrace you; and the slave-pty is also
* still locked).
*/
r = grantpt(pty->fd);
if (r < 0)
return -errno;
r = barrier_create(&pty->barrier);
if (r < 0)
return r;
*out = pty;
pty = NULL;
return 0;
}
Pty *pty_ref(Pty *pty) {
if (!pty || pty->ref < 1)
return NULL;
++pty->ref;
return pty;
}
Pty *pty_unref(Pty *pty) {
if (!pty || pty->ref < 1 || --pty->ref > 0)
return NULL;
pty_close(pty);
pty->child_source = sd_event_source_unref(pty->child_source);
barrier_destroy(&pty->barrier);
ring_clear(&pty->out_buf);
free(pty);
return NULL;
}
Barrier *pty_get_barrier(Pty *pty) {
assert(pty);
return &pty->barrier;
}
bool pty_is_unknown(Pty *pty) {
return pty && pty->role == PTY_ROLE_UNKNOWN;
}
bool pty_is_parent(Pty *pty) {
return pty && pty->role == PTY_ROLE_PARENT;
}
bool pty_is_child(Pty *pty) {
return pty && pty->role == PTY_ROLE_CHILD;
}
bool pty_has_child(Pty *pty) {
return pty_is_parent(pty) && pty->child > 0;
}
pid_t pty_get_child(Pty *pty) {
return pty_has_child(pty) ? pty->child : -ECHILD;
}
bool pty_is_open(Pty *pty) {
return pty && pty->fd >= 0;
}
int pty_get_fd(Pty *pty) {
assert_return(pty, -EINVAL);
return pty_is_open(pty) ? pty->fd : -EPIPE;
}
int pty_make_child(Pty *pty) {
_cleanup_free_ char *slave_name = NULL;
int r, fd;
assert_return(pty, -EINVAL);
assert_return(pty_is_unknown(pty), -EALREADY);
r = ptsname_malloc(pty->fd, &slave_name);
if (r < 0)
return -errno;
fd = open(slave_name, O_RDWR | O_CLOEXEC | O_NOCTTY);
if (fd < 0)
return -errno;
safe_close(pty->fd);
pty->fd = fd;
pty->child = getpid();
pty->role = PTY_ROLE_CHILD;
barrier_set_role(&pty->barrier, BARRIER_CHILD);
return 0;
}
int pty_make_parent(Pty *pty, pid_t child) {
assert_return(pty, -EINVAL);
assert_return(pty_is_unknown(pty), -EALREADY);
pty->child = child;
pty->role = PTY_ROLE_PARENT;
return 0;
}
int pty_unlock(Pty *pty) {
assert_return(pty, -EINVAL);
assert_return(pty_is_unknown(pty) || pty_is_parent(pty), -EINVAL);
assert_return(pty_is_open(pty), -ENODEV);
return unlockpt(pty->fd) < 0 ? -errno : 0;
}
int pty_setup_child(Pty *pty) {
struct termios attr;
pid_t pid;
int r;
assert_return(pty, -EINVAL);
assert_return(pty_is_child(pty), -EINVAL);
assert_return(pty_is_open(pty), -EALREADY);
r = reset_signal_mask();
if (r < 0)
return r;
r = reset_all_signal_handlers();
if (r < 0)
return r;
pid = setsid();
if (pid < 0 && errno != EPERM)
return -errno;
r = ioctl(pty->fd, TIOCSCTTY, 0);
if (r < 0)
return -errno;
r = tcgetattr(pty->fd, &attr);
if (r < 0)
return -errno;
/* erase character should be normal backspace, PLEASEEE! */
attr.c_cc[VERASE] = 010;
/* always set UTF8 flag */
attr.c_iflag |= IUTF8;
r = tcsetattr(pty->fd, TCSANOW, &attr);
if (r < 0)
return -errno;
if (dup2(pty->fd, STDIN_FILENO) != STDIN_FILENO ||
dup2(pty->fd, STDOUT_FILENO) != STDOUT_FILENO ||
dup2(pty->fd, STDERR_FILENO) != STDERR_FILENO)
return -errno;
/* only close FD if it's not a std-fd */
pty->fd = (pty->fd > 2) ? safe_close(pty->fd) : -1;
return 0;
}
void pty_close(Pty *pty) {
if (!pty_is_open(pty))
return;
pty->fd_source = sd_event_source_unref(pty->fd_source);
pty->fd = safe_close(pty->fd);
}
/*
* Drain input-queue and dispatch data via the event-handler. Returns <0 on
* error, 0 if queue is empty and 1 if we couldn't empty the input queue fast
* enough and there's still data left.
*/
static int pty_dispatch_read(Pty *pty) {
unsigned int i;
ssize_t len;
int r;
/*
* We're edge-triggered, means we need to read the whole queue. This,
* however, might cause us to stall if the writer is faster than we
* are. Therefore, try reading as much as 8 times (32KiB) and only
* bail out then.
*/
for (i = 0; i < 8; ++i) {
len = read(pty->fd, pty->in_buf, sizeof(pty->in_buf) - 1);
if (len < 0) {
if (errno == EINTR)
continue;
return (errno == EAGAIN) ? 0 : -errno;
} else if (len == 0)
continue;
/* set terminating zero for debugging safety */
pty->in_buf[len] = 0;
r = pty->event_fn(pty, pty->event_fn_userdata, PTY_DATA, pty->in_buf, len);
if (r < 0)
return r;
}
/* still data left, make sure we're queued again */
pty->needs_requeue = true;
return 1;
}
/*
* Drain output-queue by writing data to the pty. Returns <0 on error, 0 if the
* output queue is empty now and 1 if we couldn't empty the output queue fast
* enough and there's still data left.
*/
static int pty_dispatch_write(Pty *pty) {
struct iovec vec[2];
unsigned int i;
ssize_t len;
size_t num;
/*
* Same as pty_dispatch_read(), we're edge-triggered so we need to call
* write() until either all data is written or it returns EAGAIN. We
* call it twice and if it still writes successfully, we reschedule.
*/
for (i = 0; i < 2; ++i) {
num = ring_peek(&pty->out_buf, vec);
if (num < 1)
return 0;
len = writev(pty->fd, vec, (int)num);
if (len < 0) {
if (errno == EINTR)
continue;
return (errno == EAGAIN) ? 1 : -errno;
} else if (len == 0)
continue;
ring_pull(&pty->out_buf, (size_t)len);
}
/* still data left, make sure we're queued again */
if (ring_get_size(&pty->out_buf) > 0) {
pty->needs_requeue = true;
return 1;
}
return 0;
}
static int pty_fd_fn(sd_event_source *source, int fd, uint32_t revents, void *userdata) {
Pty *pty = userdata;
int r_hup = 0, r_write = 0, r_read = 0, r;
/*
* Whenever we encounter I/O errors, we have to make sure to drain the
* input queue first, before we handle any HUP. A child might send us
* a message and immediately close the queue. We must not handle the
* HUP first or we loose data.
* Therefore, if we read a message successfully, we always return
* success and wait for the next event-loop iteration. Furthermore,
* whenever there is a write-error, we must try reading from the input
* queue even if EPOLLIN is not set. The input might have arrived in
* between epoll_wait() and write(). Therefore, write-errors are only
* ever handled if the input-queue is empty. In all other cases they
* are ignored until either reading fails or the input queue is empty.
*/
if (revents & (EPOLLHUP | EPOLLERR))
r_hup = -EPIPE;
if (revents & EPOLLOUT)
r_write = pty_dispatch_write(pty);
/* Awesome! Kernel signals HUP without IN but queues are not empty.. */
if ((revents & EPOLLIN) || r_hup < 0 || r_write < 0) {
r_read = pty_dispatch_read(pty);
if (r_read > 0)
return 0; /* still data left to fetch next round */
}
if (r_hup < 0 || r_write < 0 || r_read < 0) {
/* PTY closed and input-queue drained */
pty_close(pty);
r = pty->event_fn(pty, pty->event_fn_userdata, PTY_HUP, NULL, 0);
if (r < 0)
return r;
}
return 0;
}
static int pty_fd_prepare_fn(sd_event_source *source, void *userdata) {
Pty *pty = userdata;
int r;
if (pty->needs_requeue) {
/*
* We're edge-triggered. In case we couldn't handle all events
* or in case new write-data is queued, we set needs_requeue.
* Before going asleep, we set the io-events *again*. sd-event
* notices that we're edge-triggered and forwards the call to
* the kernel even if the events didn't change. The kernel will
* check the events and re-queue us on the ready queue in case
* an event is pending.
*/
r = sd_event_source_set_io_events(source, EPOLLHUP | EPOLLERR | EPOLLIN | EPOLLOUT | EPOLLET);
if (r >= 0)
pty->needs_requeue = false;
}
return 0;
}
static int pty_child_fn(sd_event_source *source, const siginfo_t *si, void *userdata) {
Pty *pty = userdata;
int r;
pty->child = 0;
r = pty->event_fn(pty, pty->event_fn_userdata, PTY_CHILD, si, sizeof(*si));
if (r < 0)
return r;
return 0;
}
int pty_attach_event(Pty *pty, sd_event *event, pty_event_t event_fn, void *event_fn_userdata) {
int r;
assert_return(pty, -EINVAL);
assert_return(event, -EINVAL);
assert_return(event_fn, -EINVAL);
assert_return(pty_is_parent(pty), -EINVAL);
pty_detach_event(pty);
if (pty_is_open(pty)) {
r = sd_event_add_io(event,
&pty->fd_source,
pty->fd,
EPOLLHUP | EPOLLERR | EPOLLIN | EPOLLOUT | EPOLLET,
pty_fd_fn,
pty);
if (r < 0)
goto error;
r = sd_event_source_set_prepare(pty->fd_source, pty_fd_prepare_fn);
if (r < 0)
goto error;
}
if (pty_has_child(pty)) {
r = sd_event_add_child(event,
&pty->child_source,
pty->child,
WEXITED,
pty_child_fn,
pty);
if (r < 0)
goto error;
}
pty->event_fn = event_fn;
pty->event_fn_userdata = event_fn_userdata;
return 0;
error:
pty_detach_event(pty);
return r;
}
void pty_detach_event(Pty *pty) {
if (!pty)
return;
pty->child_source = sd_event_source_unref(pty->child_source);
pty->fd_source = sd_event_source_unref(pty->fd_source);
pty->event_fn = NULL;
pty->event_fn_userdata = NULL;
}
int pty_write(Pty *pty, const void *buf, size_t size) {
bool was_empty;
int r;
assert_return(pty, -EINVAL);
assert_return(pty_is_open(pty), -ENODEV);
assert_return(pty_is_parent(pty), -ENODEV);
if (size < 1)
return 0;
/*
* Push @buf[0..@size] into the output ring-buffer. In case the
* ring-buffer wasn't empty beforehand, we're already waiting for
* EPOLLOUT and we're done. If it was empty, we have to re-queue the
* FD for EPOLLOUT as we're edge-triggered and wouldn't get any new
* EPOLLOUT event.
*/
was_empty = ring_get_size(&pty->out_buf) < 1;
r = ring_push(&pty->out_buf, buf, size);
if (r < 0)
return r;
if (was_empty)
pty->needs_requeue = true;
return 0;
}
int pty_signal(Pty *pty, int sig) {
assert_return(pty, -EINVAL);
assert_return(pty_is_open(pty), -ENODEV);
assert_return(pty_is_parent(pty), -ENODEV);
return ioctl(pty->fd, TIOCSIG, sig) < 0 ? -errno : 0;
}
int pty_resize(Pty *pty, unsigned short term_width, unsigned short term_height) {
struct winsize ws = {
.ws_col = term_width,
.ws_row = term_height,
};
assert_return(pty, -EINVAL);
assert_return(pty_is_open(pty), -ENODEV);
assert_return(pty_is_parent(pty), -ENODEV);
/*
* This will send SIGWINCH to the pty slave foreground process group.
* We will also get one, but we don't need it.
*/
return ioctl(pty->fd, TIOCSWINSZ, &ws) < 0 ? -errno : 0;
}
pid_t pty_fork(Pty **out, sd_event *event, pty_event_t event_fn, void *event_fn_userdata, unsigned short initial_term_width, unsigned short initial_term_height) {
_pty_unref_ Pty *pty = NULL;
int r;
pid_t pid;
assert_return(out, -EINVAL);
assert_return((event && event_fn) || (!event && !event_fn), -EINVAL);
r = pty_new(&pty);
if (r < 0)
return r;
r = pty_unlock(pty);
if (r < 0)
return r;
pid = fork();
if (pid < 0)
return -errno;
if (pid == 0) {
/* child */
r = pty_make_child(pty);
if (r < 0)
_exit(-r);
r = pty_setup_child(pty);
if (r < 0)
_exit(-r);
/* sync with parent */
if (!barrier_place_and_sync(&pty->barrier))
_exit(1);
/* fallthrough and return the child's PTY object */
} else {
/* parent */
r = pty_make_parent(pty, pid);
if (r < 0)
goto parent_error;
r = pty_resize(pty, initial_term_width, initial_term_height);
if (r < 0)
goto parent_error;
if (event) {
r = pty_attach_event(pty, event, event_fn, event_fn_userdata);
if (r < 0)
goto parent_error;
}
/* sync with child */
if (!barrier_place_and_sync(&pty->barrier)) {
r = -ECHILD;
goto parent_error;
}
/* fallthrough and return the parent's PTY object */
}
*out = pty;
pty = NULL;
return pid;
parent_error:
barrier_abort(&pty->barrier);
waitpid(pty->child, NULL, 0);
pty->child = 0;
return r;
}

View File

@@ -1,72 +0,0 @@
/*-*- Mode: C; c-basic-offset: 8; indent-tabs-mode: nil -*-*/
#pragma once
/***
This file is part of systemd.
Copyright 2014 David Herrmann <dh.herrmann@gmail.com>
systemd is free software; you can redistribute it and/or modify it
under the terms of the GNU Lesser General Public License as published by
the Free Software Foundation; either version 2.1 of the License, or
(at your option) any later version.
systemd is distributed in the hope that it will be useful, but
WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
Lesser General Public License for more details.
You should have received a copy of the GNU Lesser General Public License
along with systemd; If not, see <http://www.gnu.org/licenses/>.
***/
#include <stdbool.h>
#include <unistd.h>
#include "barrier.h"
#include "macro.h"
#include "sd-event.h"
typedef struct Pty Pty;
enum {
PTY_CHILD,
PTY_HUP,
PTY_DATA,
};
typedef int (*pty_event_t) (Pty *pty, void *userdata, unsigned int event, const void *ptr, size_t size);
int pty_new(Pty **out);
Pty *pty_ref(Pty *pty);
Pty *pty_unref(Pty *pty);
#define _pty_unref_ _cleanup_(pty_unrefp)
DEFINE_TRIVIAL_CLEANUP_FUNC(Pty*, pty_unref);
Barrier *pty_get_barrier(Pty *pty);
bool pty_is_unknown(Pty *pty);
bool pty_is_parent(Pty *pty);
bool pty_is_child(Pty *pty);
bool pty_has_child(Pty *pty);
pid_t pty_get_child(Pty *pty);
bool pty_is_open(Pty *pty);
int pty_get_fd(Pty *pty);
int pty_make_child(Pty *pty);
int pty_make_parent(Pty *pty, pid_t child);
int pty_unlock(Pty *pty);
int pty_setup_child(Pty *pty);
void pty_close(Pty *pty);
int pty_attach_event(Pty *pty, sd_event *event, pty_event_t event_fn, void *event_fn_userdata);
void pty_detach_event(Pty *pty);
int pty_write(Pty *pty, const void *buf, size_t size);
int pty_signal(Pty *pty, int sig);
int pty_resize(Pty *pty, unsigned short term_width, unsigned short term_height);
pid_t pty_fork(Pty **out, sd_event *event, pty_event_t event_fn, void *event_fn_userdata, unsigned short initial_term_width, unsigned short initial_term_height);

View File

@@ -1,142 +0,0 @@
/*-*- Mode: C; c-basic-offset: 8; indent-tabs-mode: nil -*-*/
/***
This file is part of systemd.
Copyright 2014 David Herrmann <dh.herrmann@gmail.com>
systemd is free software; you can redistribute it and/or modify it
under the terms of the GNU Lesser General Public License as published by
the Free Software Foundation; either version 2.1 of the License, or
(at your option) any later version.
systemd is distributed in the hope that it will be useful, but
WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
Lesser General Public License for more details.
You should have received a copy of the GNU Lesser General Public License
along with systemd; If not, see <http://www.gnu.org/licenses/>.
***/
#include <errno.h>
#include <locale.h>
#include <string.h>
#include <sys/wait.h>
#include <unistd.h>
#include "pty.h"
#include "util.h"
#include "signal-util.h"
static const char sndmsg[] = "message\n";
static const char rcvmsg[] = "message\r\n";
static char rcvbuf[128];
static size_t rcvsiz = 0;
static sd_event *event;
static void run_child(Pty *pty) {
ssize_t r, l;
char buf[512];
r = read(0, buf, sizeof(buf));
assert_se((size_t)r == strlen(sndmsg));
assert_se(!strncmp(buf, sndmsg, r));
l = write(1, buf, r);
assert_se(l == r);
}
static int pty_fn(Pty *pty, void *userdata, unsigned int ev, const void *ptr, size_t size) {
switch (ev) {
case PTY_DATA:
assert_se(rcvsiz < strlen(rcvmsg) * 2);
assert_se(rcvsiz + size < sizeof(rcvbuf));
memcpy(&rcvbuf[rcvsiz], ptr, size);
rcvsiz += size;
if (rcvsiz >= strlen(rcvmsg) * 2) {
assert_se(rcvsiz == strlen(rcvmsg) * 2);
assert_se(!memcmp(rcvbuf, rcvmsg, strlen(rcvmsg)));
assert_se(!memcmp(&rcvbuf[strlen(rcvmsg)], rcvmsg, strlen(rcvmsg)));
}
break;
case PTY_HUP:
/* This is guaranteed to appear _after_ the input queues are
* drained! */
assert_se(rcvsiz == strlen(rcvmsg) * 2);
break;
case PTY_CHILD:
/* this may appear at any time */
break;
default:
assert_se(0);
break;
}
/* if we got HUP _and_ CHILD, exit */
if (pty_get_fd(pty) < 0 && pty_get_child(pty) < 0)
sd_event_exit(event, 0);
return 0;
}
static void run_parent(Pty *pty) {
int r;
/* write message to pty, ECHO mode guarantees that we get it back
* twice: once via ECHO, once from the run_child() fn */
assert_se(pty_write(pty, sndmsg, strlen(sndmsg)) >= 0);
r = sd_event_loop(event);
assert_se(r >= 0);
}
static void test_pty(void) {
pid_t pid;
Pty *pty = NULL;
rcvsiz = 0;
zero(rcvbuf);
assert_se(sd_event_default(&event) >= 0);
pid = pty_fork(&pty, event, pty_fn, NULL, 80, 25);
assert_se(pid >= 0);
if (pid == 0) {
/* child */
run_child(pty);
exit(0);
}
/* parent */
run_parent(pty);
/* Make sure the PTY recycled the child; yeah, this is racy if the
* PID was already reused; but that seems fine for a test. */
assert_se(waitpid(pid, NULL, WNOHANG) < 0 && errno == ECHILD);
pty_unref(pty);
sd_event_unref(event);
}
int main(int argc, char *argv[]) {
unsigned int i;
log_parse_environment();
log_open();
assert_se(sigprocmask_many(SIG_BLOCK, NULL, SIGCHLD, -1) >= 0);
/* Oh, there're ugly races in the TTY layer regarding HUP vs IN. Turns
* out they appear only 10% of the time. I fixed all of them and
* don't see them, anymore. But let's be safe and run this 1000 times
* so we catch any new ones, in case they appear again. */
for (i = 0; i < 1000; ++i)
test_pty();
return 0;
}

View File

@@ -1,130 +0,0 @@
/*-*- Mode: C; c-basic-offset: 8; indent-tabs-mode: nil -*-*/
/***
This file is part of systemd.
Copyright 2014 David Herrmann <dh.herrmann@gmail.com>
systemd is free software; you can redistribute it and/or modify it
under the terms of the GNU Lesser General Public License as published by
the Free Software Foundation; either version 2.1 of the License, or
(at your option) any later version.
systemd is distributed in the hope that it will be useful, but
WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
Lesser General Public License for more details.
You should have received a copy of the GNU Lesser General Public License
along with systemd; If not, see <http://www.gnu.org/licenses/>.
***/
#include <string.h>
#include "def.h"
#include "ring.h"
static void test_ring(void) {
static const char buf[8192];
Ring r;
size_t l;
struct iovec vec[2];
int s;
zero(r);
l = ring_peek(&r, vec);
assert_se(l == 0);
s = ring_push(&r, buf, 2048);
assert_se(!s);
assert_se(ring_get_size(&r) == 2048);
l = ring_peek(&r, vec);
assert_se(l == 1);
assert_se(vec[0].iov_len == 2048);
assert_se(!memcmp(vec[0].iov_base, buf, vec[0].iov_len));
assert_se(ring_get_size(&r) == 2048);
ring_pull(&r, 2048);
assert_se(ring_get_size(&r) == 0);
l = ring_peek(&r, vec);
assert_se(l == 0);
assert_se(ring_get_size(&r) == 0);
s = ring_push(&r, buf, 2048);
assert_se(!s);
assert_se(ring_get_size(&r) == 2048);
l = ring_peek(&r, vec);
assert_se(l == 1);
assert_se(vec[0].iov_len == 2048);
assert_se(!memcmp(vec[0].iov_base, buf, vec[0].iov_len));
assert_se(ring_get_size(&r) == 2048);
s = ring_push(&r, buf, 1);
assert_se(!s);
assert_se(ring_get_size(&r) == 2049);
l = ring_peek(&r, vec);
assert_se(l == 2);
assert_se(vec[0].iov_len == 2048);
assert_se(vec[1].iov_len == 1);
assert_se(!memcmp(vec[0].iov_base, buf, vec[0].iov_len));
assert_se(!memcmp(vec[1].iov_base, buf, vec[1].iov_len));
assert_se(ring_get_size(&r) == 2049);
ring_pull(&r, 2048);
assert_se(ring_get_size(&r) == 1);
l = ring_peek(&r, vec);
assert_se(l == 1);
assert_se(vec[0].iov_len == 1);
assert_se(!memcmp(vec[0].iov_base, buf, vec[0].iov_len));
assert_se(ring_get_size(&r) == 1);
ring_pull(&r, 1);
assert_se(ring_get_size(&r) == 0);
s = ring_push(&r, buf, 2048);
assert_se(!s);
assert_se(ring_get_size(&r) == 2048);
s = ring_push(&r, buf, 2049);
assert_se(!s);
assert_se(ring_get_size(&r) == 4097);
l = ring_peek(&r, vec);
assert_se(l == 1);
assert_se(vec[0].iov_len == 4097);
assert_se(!memcmp(vec[0].iov_base, buf, vec[0].iov_len));
assert_se(ring_get_size(&r) == 4097);
ring_pull(&r, 1);
assert_se(ring_get_size(&r) == 4096);
s = ring_push(&r, buf, 4096);
assert_se(!s);
assert_se(ring_get_size(&r) == 8192);
l = ring_peek(&r, vec);
assert_se(l == 2);
assert_se(vec[0].iov_len == 8191);
assert_se(vec[1].iov_len == 1);
assert_se(!memcmp(vec[0].iov_base, buf, vec[0].iov_len));
assert_se(!memcmp(vec[1].iov_base, buf, vec[1].iov_len));
assert_se(ring_get_size(&r) == 8192);
ring_clear(&r);
assert_se(ring_get_size(&r) == 0);
}
int main(int argc, char *argv[]) {
log_parse_environment();
log_open();
test_ring();
return 0;
}

View File

@@ -131,7 +131,7 @@ int main(int argc, char *argv[]) {
if (r < 0) if (r < 0)
log_warning_errno(r, "Failed to parse configuration file: %m"); log_warning_errno(r, "Failed to parse configuration file: %m");
log_debug("systemd-timesyncd running as pid %lu", (unsigned long) getpid()); log_debug("systemd-timesyncd running as pid " PID_FMT, getpid());
sd_notify(false, sd_notify(false,
"READY=1\n" "READY=1\n"
"STATUS=Daemon is running"); "STATUS=Daemon is running");