Merge tag 'v5.17-rc5' into sched/core, to resolve conflicts
New conflicts in sched/core due to the following upstream fixes:44585f7bc0("psi: fix "defined but not used" warnings when CONFIG_PROC_FS=n")a06247c680("psi: Fix uaf issue when psi trigger is destroyed while being polled") Conflicts: include/linux/psi_types.h kernel/sched/psi.c Signed-off-by: Ingo Molnar <mingo@kernel.org>
This commit is contained in:
@@ -205,9 +205,6 @@ async_cookie_t async_schedule_node_domain(async_func_t func, void *data,
|
||||
atomic_inc(&entry_count);
|
||||
spin_unlock_irqrestore(&async_lock, flags);
|
||||
|
||||
/* mark that this task has queued an async job, used by module init */
|
||||
current->flags |= PF_USED_ASYNC;
|
||||
|
||||
/* schedule for execution */
|
||||
queue_work_node(node, system_unbound_wq, &entry->work);
|
||||
|
||||
|
||||
@@ -541,20 +541,22 @@ static void kauditd_printk_skb(struct sk_buff *skb)
|
||||
/**
|
||||
* kauditd_rehold_skb - Handle a audit record send failure in the hold queue
|
||||
* @skb: audit record
|
||||
* @error: error code (unused)
|
||||
*
|
||||
* Description:
|
||||
* This should only be used by the kauditd_thread when it fails to flush the
|
||||
* hold queue.
|
||||
*/
|
||||
static void kauditd_rehold_skb(struct sk_buff *skb)
|
||||
static void kauditd_rehold_skb(struct sk_buff *skb, __always_unused int error)
|
||||
{
|
||||
/* put the record back in the queue at the same place */
|
||||
skb_queue_head(&audit_hold_queue, skb);
|
||||
/* put the record back in the queue */
|
||||
skb_queue_tail(&audit_hold_queue, skb);
|
||||
}
|
||||
|
||||
/**
|
||||
* kauditd_hold_skb - Queue an audit record, waiting for auditd
|
||||
* @skb: audit record
|
||||
* @error: error code
|
||||
*
|
||||
* Description:
|
||||
* Queue the audit record, waiting for an instance of auditd. When this
|
||||
@@ -564,19 +566,31 @@ static void kauditd_rehold_skb(struct sk_buff *skb)
|
||||
* and queue it, if we have room. If we want to hold on to the record, but we
|
||||
* don't have room, record a record lost message.
|
||||
*/
|
||||
static void kauditd_hold_skb(struct sk_buff *skb)
|
||||
static void kauditd_hold_skb(struct sk_buff *skb, int error)
|
||||
{
|
||||
/* at this point it is uncertain if we will ever send this to auditd so
|
||||
* try to send the message via printk before we go any further */
|
||||
kauditd_printk_skb(skb);
|
||||
|
||||
/* can we just silently drop the message? */
|
||||
if (!audit_default) {
|
||||
kfree_skb(skb);
|
||||
return;
|
||||
if (!audit_default)
|
||||
goto drop;
|
||||
|
||||
/* the hold queue is only for when the daemon goes away completely,
|
||||
* not -EAGAIN failures; if we are in a -EAGAIN state requeue the
|
||||
* record on the retry queue unless it's full, in which case drop it
|
||||
*/
|
||||
if (error == -EAGAIN) {
|
||||
if (!audit_backlog_limit ||
|
||||
skb_queue_len(&audit_retry_queue) < audit_backlog_limit) {
|
||||
skb_queue_tail(&audit_retry_queue, skb);
|
||||
return;
|
||||
}
|
||||
audit_log_lost("kauditd retry queue overflow");
|
||||
goto drop;
|
||||
}
|
||||
|
||||
/* if we have room, queue the message */
|
||||
/* if we have room in the hold queue, queue the message */
|
||||
if (!audit_backlog_limit ||
|
||||
skb_queue_len(&audit_hold_queue) < audit_backlog_limit) {
|
||||
skb_queue_tail(&audit_hold_queue, skb);
|
||||
@@ -585,24 +599,32 @@ static void kauditd_hold_skb(struct sk_buff *skb)
|
||||
|
||||
/* we have no other options - drop the message */
|
||||
audit_log_lost("kauditd hold queue overflow");
|
||||
drop:
|
||||
kfree_skb(skb);
|
||||
}
|
||||
|
||||
/**
|
||||
* kauditd_retry_skb - Queue an audit record, attempt to send again to auditd
|
||||
* @skb: audit record
|
||||
* @error: error code (unused)
|
||||
*
|
||||
* Description:
|
||||
* Not as serious as kauditd_hold_skb() as we still have a connected auditd,
|
||||
* but for some reason we are having problems sending it audit records so
|
||||
* queue the given record and attempt to resend.
|
||||
*/
|
||||
static void kauditd_retry_skb(struct sk_buff *skb)
|
||||
static void kauditd_retry_skb(struct sk_buff *skb, __always_unused int error)
|
||||
{
|
||||
/* NOTE: because records should only live in the retry queue for a
|
||||
* short period of time, before either being sent or moved to the hold
|
||||
* queue, we don't currently enforce a limit on this queue */
|
||||
skb_queue_tail(&audit_retry_queue, skb);
|
||||
if (!audit_backlog_limit ||
|
||||
skb_queue_len(&audit_retry_queue) < audit_backlog_limit) {
|
||||
skb_queue_tail(&audit_retry_queue, skb);
|
||||
return;
|
||||
}
|
||||
|
||||
/* we have to drop the record, send it via printk as a last effort */
|
||||
kauditd_printk_skb(skb);
|
||||
audit_log_lost("kauditd retry queue overflow");
|
||||
kfree_skb(skb);
|
||||
}
|
||||
|
||||
/**
|
||||
@@ -640,7 +662,7 @@ static void auditd_reset(const struct auditd_connection *ac)
|
||||
/* flush the retry queue to the hold queue, but don't touch the main
|
||||
* queue since we need to process that normally for multicast */
|
||||
while ((skb = skb_dequeue(&audit_retry_queue)))
|
||||
kauditd_hold_skb(skb);
|
||||
kauditd_hold_skb(skb, -ECONNREFUSED);
|
||||
}
|
||||
|
||||
/**
|
||||
@@ -714,16 +736,18 @@ static int kauditd_send_queue(struct sock *sk, u32 portid,
|
||||
struct sk_buff_head *queue,
|
||||
unsigned int retry_limit,
|
||||
void (*skb_hook)(struct sk_buff *skb),
|
||||
void (*err_hook)(struct sk_buff *skb))
|
||||
void (*err_hook)(struct sk_buff *skb, int error))
|
||||
{
|
||||
int rc = 0;
|
||||
struct sk_buff *skb;
|
||||
struct sk_buff *skb = NULL;
|
||||
struct sk_buff *skb_tail;
|
||||
unsigned int failed = 0;
|
||||
|
||||
/* NOTE: kauditd_thread takes care of all our locking, we just use
|
||||
* the netlink info passed to us (e.g. sk and portid) */
|
||||
|
||||
while ((skb = skb_dequeue(queue))) {
|
||||
skb_tail = skb_peek_tail(queue);
|
||||
while ((skb != skb_tail) && (skb = skb_dequeue(queue))) {
|
||||
/* call the skb_hook for each skb we touch */
|
||||
if (skb_hook)
|
||||
(*skb_hook)(skb);
|
||||
@@ -731,7 +755,7 @@ static int kauditd_send_queue(struct sock *sk, u32 portid,
|
||||
/* can we send to anyone via unicast? */
|
||||
if (!sk) {
|
||||
if (err_hook)
|
||||
(*err_hook)(skb);
|
||||
(*err_hook)(skb, -ECONNREFUSED);
|
||||
continue;
|
||||
}
|
||||
|
||||
@@ -745,7 +769,7 @@ retry:
|
||||
rc == -ECONNREFUSED || rc == -EPERM) {
|
||||
sk = NULL;
|
||||
if (err_hook)
|
||||
(*err_hook)(skb);
|
||||
(*err_hook)(skb, rc);
|
||||
if (rc == -EAGAIN)
|
||||
rc = 0;
|
||||
/* continue to drain the queue */
|
||||
|
||||
@@ -185,7 +185,7 @@ static int audit_match_perm(struct audit_context *ctx, int mask)
|
||||
case AUDITSC_EXECVE:
|
||||
return mask & AUDIT_PERM_EXEC;
|
||||
case AUDITSC_OPENAT2:
|
||||
return mask & ACC_MODE((u32)((struct open_how *)ctx->argv[2])->flags);
|
||||
return mask & ACC_MODE((u32)ctx->openat2.flags);
|
||||
default:
|
||||
return 0;
|
||||
}
|
||||
|
||||
@@ -207,7 +207,7 @@ BTF_ID(func, bpf_lsm_socket_socketpair)
|
||||
|
||||
BTF_ID(func, bpf_lsm_syslog)
|
||||
BTF_ID(func, bpf_lsm_task_alloc)
|
||||
BTF_ID(func, bpf_lsm_task_getsecid_subj)
|
||||
BTF_ID(func, bpf_lsm_current_getsecid_subj)
|
||||
BTF_ID(func, bpf_lsm_task_getsecid_obj)
|
||||
BTF_ID(func, bpf_lsm_task_prctl)
|
||||
BTF_ID(func, bpf_lsm_task_setscheduler)
|
||||
|
||||
@@ -104,7 +104,7 @@ static struct bpf_ringbuf *bpf_ringbuf_area_alloc(size_t data_sz, int numa_node)
|
||||
}
|
||||
|
||||
rb = vmap(pages, nr_meta_pages + 2 * nr_data_pages,
|
||||
VM_ALLOC | VM_USERMAP, PAGE_KERNEL);
|
||||
VM_MAP | VM_USERMAP, PAGE_KERNEL);
|
||||
if (rb) {
|
||||
kmemleak_not_leak(pages);
|
||||
rb->pages = pages;
|
||||
|
||||
@@ -472,13 +472,14 @@ BPF_CALL_4(bpf_get_task_stack, struct task_struct *, task, void *, buf,
|
||||
u32, size, u64, flags)
|
||||
{
|
||||
struct pt_regs *regs;
|
||||
long res;
|
||||
long res = -EINVAL;
|
||||
|
||||
if (!try_get_task_stack(task))
|
||||
return -EFAULT;
|
||||
|
||||
regs = task_pt_regs(task);
|
||||
res = __bpf_get_stack(regs, task, NULL, buf, size, flags);
|
||||
if (regs)
|
||||
res = __bpf_get_stack(regs, task, NULL, buf, size, flags);
|
||||
put_task_stack(task);
|
||||
|
||||
return res;
|
||||
|
||||
@@ -550,11 +550,12 @@ static __always_inline u64 notrace bpf_prog_start_time(void)
|
||||
static void notrace inc_misses_counter(struct bpf_prog *prog)
|
||||
{
|
||||
struct bpf_prog_stats *stats;
|
||||
unsigned int flags;
|
||||
|
||||
stats = this_cpu_ptr(prog->stats);
|
||||
u64_stats_update_begin(&stats->syncp);
|
||||
flags = u64_stats_update_begin_irqsave(&stats->syncp);
|
||||
u64_stats_inc(&stats->misses);
|
||||
u64_stats_update_end(&stats->syncp);
|
||||
u64_stats_update_end_irqrestore(&stats->syncp, flags);
|
||||
}
|
||||
|
||||
/* The logic is similar to bpf_prog_run(), but with an explicit
|
||||
|
||||
@@ -549,6 +549,14 @@ static ssize_t cgroup_release_agent_write(struct kernfs_open_file *of,
|
||||
|
||||
BUILD_BUG_ON(sizeof(cgrp->root->release_agent_path) < PATH_MAX);
|
||||
|
||||
/*
|
||||
* Release agent gets called with all capabilities,
|
||||
* require capabilities to set release agent.
|
||||
*/
|
||||
if ((of->file->f_cred->user_ns != &init_user_ns) ||
|
||||
!capable(CAP_SYS_ADMIN))
|
||||
return -EPERM;
|
||||
|
||||
cgrp = cgroup_kn_lock_live(of->kn, false);
|
||||
if (!cgrp)
|
||||
return -ENODEV;
|
||||
@@ -954,6 +962,12 @@ int cgroup1_parse_param(struct fs_context *fc, struct fs_parameter *param)
|
||||
/* Specifying two release agents is forbidden */
|
||||
if (ctx->release_agent)
|
||||
return invalfc(fc, "release_agent respecified");
|
||||
/*
|
||||
* Release agent gets called with all capabilities,
|
||||
* require capabilities to set release agent.
|
||||
*/
|
||||
if ((fc->user_ns != &init_user_ns) || !capable(CAP_SYS_ADMIN))
|
||||
return invalfc(fc, "Setting release_agent not allowed");
|
||||
ctx->release_agent = param->string;
|
||||
param->string = NULL;
|
||||
break;
|
||||
|
||||
@@ -3643,6 +3643,12 @@ static ssize_t cgroup_pressure_write(struct kernfs_open_file *of, char *buf,
|
||||
cgroup_get(cgrp);
|
||||
cgroup_kn_unlock(of->kn);
|
||||
|
||||
/* Allow only one trigger per file descriptor */
|
||||
if (ctx->psi.trigger) {
|
||||
cgroup_put(cgrp);
|
||||
return -EBUSY;
|
||||
}
|
||||
|
||||
psi = cgroup_ino(cgrp) == 1 ? &psi_system : &cgrp->psi;
|
||||
new = psi_trigger_create(psi, buf, nbytes, res);
|
||||
if (IS_ERR(new)) {
|
||||
@@ -3650,8 +3656,7 @@ static ssize_t cgroup_pressure_write(struct kernfs_open_file *of, char *buf,
|
||||
return PTR_ERR(new);
|
||||
}
|
||||
|
||||
psi_trigger_replace(&ctx->psi.trigger, new);
|
||||
|
||||
smp_store_release(&ctx->psi.trigger, new);
|
||||
cgroup_put(cgrp);
|
||||
|
||||
return nbytes;
|
||||
@@ -3690,7 +3695,7 @@ static void cgroup_pressure_release(struct kernfs_open_file *of)
|
||||
{
|
||||
struct cgroup_file_ctx *ctx = of->priv;
|
||||
|
||||
psi_trigger_replace(&ctx->psi.trigger, NULL);
|
||||
psi_trigger_destroy(ctx->psi.trigger);
|
||||
}
|
||||
|
||||
bool cgroup_psi_enabled(void)
|
||||
|
||||
@@ -590,6 +590,35 @@ static inline void free_cpuset(struct cpuset *cs)
|
||||
kfree(cs);
|
||||
}
|
||||
|
||||
/*
|
||||
* validate_change_legacy() - Validate conditions specific to legacy (v1)
|
||||
* behavior.
|
||||
*/
|
||||
static int validate_change_legacy(struct cpuset *cur, struct cpuset *trial)
|
||||
{
|
||||
struct cgroup_subsys_state *css;
|
||||
struct cpuset *c, *par;
|
||||
int ret;
|
||||
|
||||
WARN_ON_ONCE(!rcu_read_lock_held());
|
||||
|
||||
/* Each of our child cpusets must be a subset of us */
|
||||
ret = -EBUSY;
|
||||
cpuset_for_each_child(c, css, cur)
|
||||
if (!is_cpuset_subset(c, trial))
|
||||
goto out;
|
||||
|
||||
/* On legacy hierarchy, we must be a subset of our parent cpuset. */
|
||||
ret = -EACCES;
|
||||
par = parent_cs(cur);
|
||||
if (par && !is_cpuset_subset(trial, par))
|
||||
goto out;
|
||||
|
||||
ret = 0;
|
||||
out:
|
||||
return ret;
|
||||
}
|
||||
|
||||
/*
|
||||
* validate_change() - Used to validate that any proposed cpuset change
|
||||
* follows the structural rules for cpusets.
|
||||
@@ -614,20 +643,21 @@ static int validate_change(struct cpuset *cur, struct cpuset *trial)
|
||||
{
|
||||
struct cgroup_subsys_state *css;
|
||||
struct cpuset *c, *par;
|
||||
int ret;
|
||||
|
||||
/* The checks don't apply to root cpuset */
|
||||
if (cur == &top_cpuset)
|
||||
return 0;
|
||||
int ret = 0;
|
||||
|
||||
rcu_read_lock();
|
||||
par = parent_cs(cur);
|
||||
|
||||
/* On legacy hierarchy, we must be a subset of our parent cpuset. */
|
||||
ret = -EACCES;
|
||||
if (!is_in_v2_mode() && !is_cpuset_subset(trial, par))
|
||||
if (!is_in_v2_mode())
|
||||
ret = validate_change_legacy(cur, trial);
|
||||
if (ret)
|
||||
goto out;
|
||||
|
||||
/* Remaining checks don't apply to root cpuset */
|
||||
if (cur == &top_cpuset)
|
||||
goto out;
|
||||
|
||||
par = parent_cs(cur);
|
||||
|
||||
/*
|
||||
* If either I or some sibling (!= me) is exclusive, we can't
|
||||
* overlap
|
||||
@@ -1175,9 +1205,7 @@ enum subparts_cmd {
|
||||
*
|
||||
* Because of the implicit cpu exclusive nature of a partition root,
|
||||
* cpumask changes that violates the cpu exclusivity rule will not be
|
||||
* permitted when checked by validate_change(). The validate_change()
|
||||
* function will also prevent any changes to the cpu list if it is not
|
||||
* a superset of children's cpu lists.
|
||||
* permitted when checked by validate_change().
|
||||
*/
|
||||
static int update_parent_subparts_cpumask(struct cpuset *cpuset, int cmd,
|
||||
struct cpumask *newmask,
|
||||
@@ -1522,10 +1550,15 @@ static void update_sibling_cpumasks(struct cpuset *parent, struct cpuset *cs,
|
||||
struct cpuset *sibling;
|
||||
struct cgroup_subsys_state *pos_css;
|
||||
|
||||
percpu_rwsem_assert_held(&cpuset_rwsem);
|
||||
|
||||
/*
|
||||
* Check all its siblings and call update_cpumasks_hier()
|
||||
* if their use_parent_ecpus flag is set in order for them
|
||||
* to use the right effective_cpus value.
|
||||
*
|
||||
* The update_cpumasks_hier() function may sleep. So we have to
|
||||
* release the RCU read lock before calling it.
|
||||
*/
|
||||
rcu_read_lock();
|
||||
cpuset_for_each_child(sibling, pos_css, parent) {
|
||||
@@ -1533,8 +1566,13 @@ static void update_sibling_cpumasks(struct cpuset *parent, struct cpuset *cs,
|
||||
continue;
|
||||
if (!sibling->use_parent_ecpus)
|
||||
continue;
|
||||
if (!css_tryget_online(&sibling->css))
|
||||
continue;
|
||||
|
||||
rcu_read_unlock();
|
||||
update_cpumasks_hier(sibling, tmp);
|
||||
rcu_read_lock();
|
||||
css_put(&sibling->css);
|
||||
}
|
||||
rcu_read_unlock();
|
||||
}
|
||||
@@ -1607,8 +1645,7 @@ static int update_cpumask(struct cpuset *cs, struct cpuset *trialcs,
|
||||
* Make sure that subparts_cpus is a subset of cpus_allowed.
|
||||
*/
|
||||
if (cs->nr_subparts_cpus) {
|
||||
cpumask_andnot(cs->subparts_cpus, cs->subparts_cpus,
|
||||
cs->cpus_allowed);
|
||||
cpumask_and(cs->subparts_cpus, cs->subparts_cpus, cs->cpus_allowed);
|
||||
cs->nr_subparts_cpus = cpumask_weight(cs->subparts_cpus);
|
||||
}
|
||||
spin_unlock_irq(&callback_lock);
|
||||
|
||||
@@ -665,21 +665,16 @@ EXPORT_SYMBOL(cred_fscmp);
|
||||
|
||||
int set_cred_ucounts(struct cred *new)
|
||||
{
|
||||
struct task_struct *task = current;
|
||||
const struct cred *old = task->real_cred;
|
||||
struct ucounts *new_ucounts, *old_ucounts = new->ucounts;
|
||||
|
||||
if (new->user == old->user && new->user_ns == old->user_ns)
|
||||
return 0;
|
||||
|
||||
/*
|
||||
* This optimization is needed because alloc_ucounts() uses locks
|
||||
* for table lookups.
|
||||
*/
|
||||
if (old_ucounts->ns == new->user_ns && uid_eq(old_ucounts->uid, new->euid))
|
||||
if (old_ucounts->ns == new->user_ns && uid_eq(old_ucounts->uid, new->uid))
|
||||
return 0;
|
||||
|
||||
if (!(new_ucounts = alloc_ucounts(new->user_ns, new->euid)))
|
||||
if (!(new_ucounts = alloc_ucounts(new->user_ns, new->uid)))
|
||||
return -EAGAIN;
|
||||
|
||||
new->ucounts = new_ucounts;
|
||||
|
||||
@@ -674,6 +674,23 @@ perf_event_set_state(struct perf_event *event, enum perf_event_state state)
|
||||
WRITE_ONCE(event->state, state);
|
||||
}
|
||||
|
||||
/*
|
||||
* UP store-release, load-acquire
|
||||
*/
|
||||
|
||||
#define __store_release(ptr, val) \
|
||||
do { \
|
||||
barrier(); \
|
||||
WRITE_ONCE(*(ptr), (val)); \
|
||||
} while (0)
|
||||
|
||||
#define __load_acquire(ptr) \
|
||||
({ \
|
||||
__unqual_scalar_typeof(*(ptr)) ___p = READ_ONCE(*(ptr)); \
|
||||
barrier(); \
|
||||
___p; \
|
||||
})
|
||||
|
||||
#ifdef CONFIG_CGROUP_PERF
|
||||
|
||||
static inline bool
|
||||
@@ -719,34 +736,51 @@ static inline u64 perf_cgroup_event_time(struct perf_event *event)
|
||||
return t->time;
|
||||
}
|
||||
|
||||
static inline void __update_cgrp_time(struct perf_cgroup *cgrp)
|
||||
static inline u64 perf_cgroup_event_time_now(struct perf_event *event, u64 now)
|
||||
{
|
||||
struct perf_cgroup_info *info;
|
||||
u64 now;
|
||||
struct perf_cgroup_info *t;
|
||||
|
||||
now = perf_clock();
|
||||
|
||||
info = this_cpu_ptr(cgrp->info);
|
||||
|
||||
info->time += now - info->timestamp;
|
||||
info->timestamp = now;
|
||||
t = per_cpu_ptr(event->cgrp->info, event->cpu);
|
||||
if (!__load_acquire(&t->active))
|
||||
return t->time;
|
||||
now += READ_ONCE(t->timeoffset);
|
||||
return now;
|
||||
}
|
||||
|
||||
static inline void update_cgrp_time_from_cpuctx(struct perf_cpu_context *cpuctx)
|
||||
static inline void __update_cgrp_time(struct perf_cgroup_info *info, u64 now, bool adv)
|
||||
{
|
||||
if (adv)
|
||||
info->time += now - info->timestamp;
|
||||
info->timestamp = now;
|
||||
/*
|
||||
* see update_context_time()
|
||||
*/
|
||||
WRITE_ONCE(info->timeoffset, info->time - info->timestamp);
|
||||
}
|
||||
|
||||
static inline void update_cgrp_time_from_cpuctx(struct perf_cpu_context *cpuctx, bool final)
|
||||
{
|
||||
struct perf_cgroup *cgrp = cpuctx->cgrp;
|
||||
struct cgroup_subsys_state *css;
|
||||
struct perf_cgroup_info *info;
|
||||
|
||||
if (cgrp) {
|
||||
u64 now = perf_clock();
|
||||
|
||||
for (css = &cgrp->css; css; css = css->parent) {
|
||||
cgrp = container_of(css, struct perf_cgroup, css);
|
||||
__update_cgrp_time(cgrp);
|
||||
info = this_cpu_ptr(cgrp->info);
|
||||
|
||||
__update_cgrp_time(info, now, true);
|
||||
if (final)
|
||||
__store_release(&info->active, 0);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
static inline void update_cgrp_time_from_event(struct perf_event *event)
|
||||
{
|
||||
struct perf_cgroup_info *info;
|
||||
struct perf_cgroup *cgrp;
|
||||
|
||||
/*
|
||||
@@ -760,8 +794,10 @@ static inline void update_cgrp_time_from_event(struct perf_event *event)
|
||||
/*
|
||||
* Do not update time when cgroup is not active
|
||||
*/
|
||||
if (cgroup_is_descendant(cgrp->css.cgroup, event->cgrp->css.cgroup))
|
||||
__update_cgrp_time(event->cgrp);
|
||||
if (cgroup_is_descendant(cgrp->css.cgroup, event->cgrp->css.cgroup)) {
|
||||
info = this_cpu_ptr(event->cgrp->info);
|
||||
__update_cgrp_time(info, perf_clock(), true);
|
||||
}
|
||||
}
|
||||
|
||||
static inline void
|
||||
@@ -785,7 +821,8 @@ perf_cgroup_set_timestamp(struct task_struct *task,
|
||||
for (css = &cgrp->css; css; css = css->parent) {
|
||||
cgrp = container_of(css, struct perf_cgroup, css);
|
||||
info = this_cpu_ptr(cgrp->info);
|
||||
info->timestamp = ctx->timestamp;
|
||||
__update_cgrp_time(info, ctx->timestamp, false);
|
||||
__store_release(&info->active, 1);
|
||||
}
|
||||
}
|
||||
|
||||
@@ -802,7 +839,7 @@ static DEFINE_PER_CPU(struct list_head, cgrp_cpuctx_list);
|
||||
*/
|
||||
static void perf_cgroup_switch(struct task_struct *task, int mode)
|
||||
{
|
||||
struct perf_cpu_context *cpuctx;
|
||||
struct perf_cpu_context *cpuctx, *tmp;
|
||||
struct list_head *list;
|
||||
unsigned long flags;
|
||||
|
||||
@@ -813,7 +850,7 @@ static void perf_cgroup_switch(struct task_struct *task, int mode)
|
||||
local_irq_save(flags);
|
||||
|
||||
list = this_cpu_ptr(&cgrp_cpuctx_list);
|
||||
list_for_each_entry(cpuctx, list, cgrp_cpuctx_entry) {
|
||||
list_for_each_entry_safe(cpuctx, tmp, list, cgrp_cpuctx_entry) {
|
||||
WARN_ON_ONCE(cpuctx->ctx.nr_cgroups == 0);
|
||||
|
||||
perf_ctx_lock(cpuctx, cpuctx->task_ctx);
|
||||
@@ -981,14 +1018,6 @@ out:
|
||||
return ret;
|
||||
}
|
||||
|
||||
static inline void
|
||||
perf_cgroup_set_shadow_time(struct perf_event *event, u64 now)
|
||||
{
|
||||
struct perf_cgroup_info *t;
|
||||
t = per_cpu_ptr(event->cgrp->info, event->cpu);
|
||||
event->shadow_ctx_time = now - t->timestamp;
|
||||
}
|
||||
|
||||
static inline void
|
||||
perf_cgroup_event_enable(struct perf_event *event, struct perf_event_context *ctx)
|
||||
{
|
||||
@@ -1066,7 +1095,8 @@ static inline void update_cgrp_time_from_event(struct perf_event *event)
|
||||
{
|
||||
}
|
||||
|
||||
static inline void update_cgrp_time_from_cpuctx(struct perf_cpu_context *cpuctx)
|
||||
static inline void update_cgrp_time_from_cpuctx(struct perf_cpu_context *cpuctx,
|
||||
bool final)
|
||||
{
|
||||
}
|
||||
|
||||
@@ -1098,12 +1128,12 @@ perf_cgroup_switch(struct task_struct *task, struct task_struct *next)
|
||||
{
|
||||
}
|
||||
|
||||
static inline void
|
||||
perf_cgroup_set_shadow_time(struct perf_event *event, u64 now)
|
||||
static inline u64 perf_cgroup_event_time(struct perf_event *event)
|
||||
{
|
||||
return 0;
|
||||
}
|
||||
|
||||
static inline u64 perf_cgroup_event_time(struct perf_event *event)
|
||||
static inline u64 perf_cgroup_event_time_now(struct perf_event *event, u64 now)
|
||||
{
|
||||
return 0;
|
||||
}
|
||||
@@ -1525,22 +1555,59 @@ static void perf_unpin_context(struct perf_event_context *ctx)
|
||||
/*
|
||||
* Update the record of the current time in a context.
|
||||
*/
|
||||
static void update_context_time(struct perf_event_context *ctx)
|
||||
static void __update_context_time(struct perf_event_context *ctx, bool adv)
|
||||
{
|
||||
u64 now = perf_clock();
|
||||
|
||||
ctx->time += now - ctx->timestamp;
|
||||
if (adv)
|
||||
ctx->time += now - ctx->timestamp;
|
||||
ctx->timestamp = now;
|
||||
|
||||
/*
|
||||
* The above: time' = time + (now - timestamp), can be re-arranged
|
||||
* into: time` = now + (time - timestamp), which gives a single value
|
||||
* offset to compute future time without locks on.
|
||||
*
|
||||
* See perf_event_time_now(), which can be used from NMI context where
|
||||
* it's (obviously) not possible to acquire ctx->lock in order to read
|
||||
* both the above values in a consistent manner.
|
||||
*/
|
||||
WRITE_ONCE(ctx->timeoffset, ctx->time - ctx->timestamp);
|
||||
}
|
||||
|
||||
static void update_context_time(struct perf_event_context *ctx)
|
||||
{
|
||||
__update_context_time(ctx, true);
|
||||
}
|
||||
|
||||
static u64 perf_event_time(struct perf_event *event)
|
||||
{
|
||||
struct perf_event_context *ctx = event->ctx;
|
||||
|
||||
if (unlikely(!ctx))
|
||||
return 0;
|
||||
|
||||
if (is_cgroup_event(event))
|
||||
return perf_cgroup_event_time(event);
|
||||
|
||||
return ctx ? ctx->time : 0;
|
||||
return ctx->time;
|
||||
}
|
||||
|
||||
static u64 perf_event_time_now(struct perf_event *event, u64 now)
|
||||
{
|
||||
struct perf_event_context *ctx = event->ctx;
|
||||
|
||||
if (unlikely(!ctx))
|
||||
return 0;
|
||||
|
||||
if (is_cgroup_event(event))
|
||||
return perf_cgroup_event_time_now(event, now);
|
||||
|
||||
if (!(__load_acquire(&ctx->is_active) & EVENT_TIME))
|
||||
return ctx->time;
|
||||
|
||||
now += READ_ONCE(ctx->timeoffset);
|
||||
return now;
|
||||
}
|
||||
|
||||
static enum event_type_t get_event_type(struct perf_event *event)
|
||||
@@ -2350,7 +2417,7 @@ __perf_remove_from_context(struct perf_event *event,
|
||||
|
||||
if (ctx->is_active & EVENT_TIME) {
|
||||
update_context_time(ctx);
|
||||
update_cgrp_time_from_cpuctx(cpuctx);
|
||||
update_cgrp_time_from_cpuctx(cpuctx, false);
|
||||
}
|
||||
|
||||
event_sched_out(event, cpuctx, ctx);
|
||||
@@ -2361,6 +2428,9 @@ __perf_remove_from_context(struct perf_event *event,
|
||||
list_del_event(event, ctx);
|
||||
|
||||
if (!ctx->nr_events && ctx->is_active) {
|
||||
if (ctx == &cpuctx->ctx)
|
||||
update_cgrp_time_from_cpuctx(cpuctx, true);
|
||||
|
||||
ctx->is_active = 0;
|
||||
ctx->rotate_necessary = 0;
|
||||
if (ctx->task) {
|
||||
@@ -2392,7 +2462,11 @@ static void perf_remove_from_context(struct perf_event *event, unsigned long fla
|
||||
* event_function_call() user.
|
||||
*/
|
||||
raw_spin_lock_irq(&ctx->lock);
|
||||
if (!ctx->is_active) {
|
||||
/*
|
||||
* Cgroup events are per-cpu events, and must IPI because of
|
||||
* cgrp_cpuctx_list.
|
||||
*/
|
||||
if (!ctx->is_active && !is_cgroup_event(event)) {
|
||||
__perf_remove_from_context(event, __get_cpu_context(ctx),
|
||||
ctx, (void *)flags);
|
||||
raw_spin_unlock_irq(&ctx->lock);
|
||||
@@ -2482,40 +2556,6 @@ void perf_event_disable_inatomic(struct perf_event *event)
|
||||
irq_work_queue(&event->pending);
|
||||
}
|
||||
|
||||
static void perf_set_shadow_time(struct perf_event *event,
|
||||
struct perf_event_context *ctx)
|
||||
{
|
||||
/*
|
||||
* use the correct time source for the time snapshot
|
||||
*
|
||||
* We could get by without this by leveraging the
|
||||
* fact that to get to this function, the caller
|
||||
* has most likely already called update_context_time()
|
||||
* and update_cgrp_time_xx() and thus both timestamp
|
||||
* are identical (or very close). Given that tstamp is,
|
||||
* already adjusted for cgroup, we could say that:
|
||||
* tstamp - ctx->timestamp
|
||||
* is equivalent to
|
||||
* tstamp - cgrp->timestamp.
|
||||
*
|
||||
* Then, in perf_output_read(), the calculation would
|
||||
* work with no changes because:
|
||||
* - event is guaranteed scheduled in
|
||||
* - no scheduled out in between
|
||||
* - thus the timestamp would be the same
|
||||
*
|
||||
* But this is a bit hairy.
|
||||
*
|
||||
* So instead, we have an explicit cgroup call to remain
|
||||
* within the time source all along. We believe it
|
||||
* is cleaner and simpler to understand.
|
||||
*/
|
||||
if (is_cgroup_event(event))
|
||||
perf_cgroup_set_shadow_time(event, event->tstamp);
|
||||
else
|
||||
event->shadow_ctx_time = event->tstamp - ctx->timestamp;
|
||||
}
|
||||
|
||||
#define MAX_INTERRUPTS (~0ULL)
|
||||
|
||||
static void perf_log_throttle(struct perf_event *event, int enable);
|
||||
@@ -2556,8 +2596,6 @@ event_sched_in(struct perf_event *event,
|
||||
|
||||
perf_pmu_disable(event->pmu);
|
||||
|
||||
perf_set_shadow_time(event, ctx);
|
||||
|
||||
perf_log_itrace_start(event);
|
||||
|
||||
if (event->pmu->add(event, PERF_EF_START)) {
|
||||
@@ -2861,11 +2899,14 @@ perf_install_in_context(struct perf_event_context *ctx,
|
||||
* perf_event_attr::disabled events will not run and can be initialized
|
||||
* without IPI. Except when this is the first event for the context, in
|
||||
* that case we need the magic of the IPI to set ctx->is_active.
|
||||
* Similarly, cgroup events for the context also needs the IPI to
|
||||
* manipulate the cgrp_cpuctx_list.
|
||||
*
|
||||
* The IOC_ENABLE that is sure to follow the creation of a disabled
|
||||
* event will issue the IPI and reprogram the hardware.
|
||||
*/
|
||||
if (__perf_effective_state(event) == PERF_EVENT_STATE_OFF && ctx->nr_events) {
|
||||
if (__perf_effective_state(event) == PERF_EVENT_STATE_OFF &&
|
||||
ctx->nr_events && !is_cgroup_event(event)) {
|
||||
raw_spin_lock_irq(&ctx->lock);
|
||||
if (ctx->task == TASK_TOMBSTONE) {
|
||||
raw_spin_unlock_irq(&ctx->lock);
|
||||
@@ -3197,6 +3238,15 @@ static int perf_event_modify_breakpoint(struct perf_event *bp,
|
||||
return err;
|
||||
}
|
||||
|
||||
/*
|
||||
* Copy event-type-independent attributes that may be modified.
|
||||
*/
|
||||
static void perf_event_modify_copy_attr(struct perf_event_attr *to,
|
||||
const struct perf_event_attr *from)
|
||||
{
|
||||
to->sig_data = from->sig_data;
|
||||
}
|
||||
|
||||
static int perf_event_modify_attr(struct perf_event *event,
|
||||
struct perf_event_attr *attr)
|
||||
{
|
||||
@@ -3219,10 +3269,17 @@ static int perf_event_modify_attr(struct perf_event *event,
|
||||
WARN_ON_ONCE(event->ctx->parent_ctx);
|
||||
|
||||
mutex_lock(&event->child_mutex);
|
||||
/*
|
||||
* Event-type-independent attributes must be copied before event-type
|
||||
* modification, which will validate that final attributes match the
|
||||
* source attributes after all relevant attributes have been copied.
|
||||
*/
|
||||
perf_event_modify_copy_attr(&event->attr, attr);
|
||||
err = func(event, attr);
|
||||
if (err)
|
||||
goto out;
|
||||
list_for_each_entry(child, &event->child_list, child_list) {
|
||||
perf_event_modify_copy_attr(&child->attr, attr);
|
||||
err = func(child, attr);
|
||||
if (err)
|
||||
goto out;
|
||||
@@ -3251,16 +3308,6 @@ static void ctx_sched_out(struct perf_event_context *ctx,
|
||||
return;
|
||||
}
|
||||
|
||||
ctx->is_active &= ~event_type;
|
||||
if (!(ctx->is_active & EVENT_ALL))
|
||||
ctx->is_active = 0;
|
||||
|
||||
if (ctx->task) {
|
||||
WARN_ON_ONCE(cpuctx->task_ctx != ctx);
|
||||
if (!ctx->is_active)
|
||||
cpuctx->task_ctx = NULL;
|
||||
}
|
||||
|
||||
/*
|
||||
* Always update time if it was set; not only when it changes.
|
||||
* Otherwise we can 'forget' to update time for any but the last
|
||||
@@ -3274,7 +3321,22 @@ static void ctx_sched_out(struct perf_event_context *ctx,
|
||||
if (is_active & EVENT_TIME) {
|
||||
/* update (and stop) ctx time */
|
||||
update_context_time(ctx);
|
||||
update_cgrp_time_from_cpuctx(cpuctx);
|
||||
update_cgrp_time_from_cpuctx(cpuctx, ctx == &cpuctx->ctx);
|
||||
/*
|
||||
* CPU-release for the below ->is_active store,
|
||||
* see __load_acquire() in perf_event_time_now()
|
||||
*/
|
||||
barrier();
|
||||
}
|
||||
|
||||
ctx->is_active &= ~event_type;
|
||||
if (!(ctx->is_active & EVENT_ALL))
|
||||
ctx->is_active = 0;
|
||||
|
||||
if (ctx->task) {
|
||||
WARN_ON_ONCE(cpuctx->task_ctx != ctx);
|
||||
if (!ctx->is_active)
|
||||
cpuctx->task_ctx = NULL;
|
||||
}
|
||||
|
||||
is_active ^= ctx->is_active; /* changed bits */
|
||||
@@ -3711,13 +3773,19 @@ static noinline int visit_groups_merge(struct perf_cpu_context *cpuctx,
|
||||
return 0;
|
||||
}
|
||||
|
||||
/*
|
||||
* Because the userpage is strictly per-event (there is no concept of context,
|
||||
* so there cannot be a context indirection), every userpage must be updated
|
||||
* when context time starts :-(
|
||||
*
|
||||
* IOW, we must not miss EVENT_TIME edges.
|
||||
*/
|
||||
static inline bool event_update_userpage(struct perf_event *event)
|
||||
{
|
||||
if (likely(!atomic_read(&event->mmap_count)))
|
||||
return false;
|
||||
|
||||
perf_event_update_time(event);
|
||||
perf_set_shadow_time(event, event->ctx);
|
||||
perf_event_update_userpage(event);
|
||||
|
||||
return true;
|
||||
@@ -3801,13 +3869,23 @@ ctx_sched_in(struct perf_event_context *ctx,
|
||||
struct task_struct *task)
|
||||
{
|
||||
int is_active = ctx->is_active;
|
||||
u64 now;
|
||||
|
||||
lockdep_assert_held(&ctx->lock);
|
||||
|
||||
if (likely(!ctx->nr_events))
|
||||
return;
|
||||
|
||||
if (is_active ^ EVENT_TIME) {
|
||||
/* start ctx time */
|
||||
__update_context_time(ctx, false);
|
||||
perf_cgroup_set_timestamp(task, ctx);
|
||||
/*
|
||||
* CPU-release for the below ->is_active store,
|
||||
* see __load_acquire() in perf_event_time_now()
|
||||
*/
|
||||
barrier();
|
||||
}
|
||||
|
||||
ctx->is_active |= (event_type | EVENT_TIME);
|
||||
if (ctx->task) {
|
||||
if (!is_active)
|
||||
@@ -3818,13 +3896,6 @@ ctx_sched_in(struct perf_event_context *ctx,
|
||||
|
||||
is_active ^= ctx->is_active; /* changed bits */
|
||||
|
||||
if (is_active & EVENT_TIME) {
|
||||
/* start ctx time */
|
||||
now = perf_clock();
|
||||
ctx->timestamp = now;
|
||||
perf_cgroup_set_timestamp(task, ctx);
|
||||
}
|
||||
|
||||
/*
|
||||
* First go through the list and put on any pinned groups
|
||||
* in order to give them the best chance of going on.
|
||||
@@ -4418,6 +4489,18 @@ static inline u64 perf_event_count(struct perf_event *event)
|
||||
return local64_read(&event->count) + atomic64_read(&event->child_count);
|
||||
}
|
||||
|
||||
static void calc_timer_values(struct perf_event *event,
|
||||
u64 *now,
|
||||
u64 *enabled,
|
||||
u64 *running)
|
||||
{
|
||||
u64 ctx_time;
|
||||
|
||||
*now = perf_clock();
|
||||
ctx_time = perf_event_time_now(event, *now);
|
||||
__perf_update_times(event, ctx_time, enabled, running);
|
||||
}
|
||||
|
||||
/*
|
||||
* NMI-safe method to read a local event, that is an event that
|
||||
* is:
|
||||
@@ -4477,10 +4560,9 @@ int perf_event_read_local(struct perf_event *event, u64 *value,
|
||||
|
||||
*value = local64_read(&event->count);
|
||||
if (enabled || running) {
|
||||
u64 now = event->shadow_ctx_time + perf_clock();
|
||||
u64 __enabled, __running;
|
||||
u64 __enabled, __running, __now;;
|
||||
|
||||
__perf_update_times(event, now, &__enabled, &__running);
|
||||
calc_timer_values(event, &__now, &__enabled, &__running);
|
||||
if (enabled)
|
||||
*enabled = __enabled;
|
||||
if (running)
|
||||
@@ -5802,18 +5884,6 @@ static int perf_event_index(struct perf_event *event)
|
||||
return event->pmu->event_idx(event);
|
||||
}
|
||||
|
||||
static void calc_timer_values(struct perf_event *event,
|
||||
u64 *now,
|
||||
u64 *enabled,
|
||||
u64 *running)
|
||||
{
|
||||
u64 ctx_time;
|
||||
|
||||
*now = perf_clock();
|
||||
ctx_time = event->shadow_ctx_time + *now;
|
||||
__perf_update_times(event, ctx_time, enabled, running);
|
||||
}
|
||||
|
||||
static void perf_event_init_userpage(struct perf_event *event)
|
||||
{
|
||||
struct perf_event_mmap_page *userpg;
|
||||
@@ -5938,6 +6008,8 @@ static void ring_buffer_attach(struct perf_event *event,
|
||||
struct perf_buffer *old_rb = NULL;
|
||||
unsigned long flags;
|
||||
|
||||
WARN_ON_ONCE(event->parent);
|
||||
|
||||
if (event->rb) {
|
||||
/*
|
||||
* Should be impossible, we set this when removing
|
||||
@@ -5995,6 +6067,9 @@ static void ring_buffer_wakeup(struct perf_event *event)
|
||||
{
|
||||
struct perf_buffer *rb;
|
||||
|
||||
if (event->parent)
|
||||
event = event->parent;
|
||||
|
||||
rcu_read_lock();
|
||||
rb = rcu_dereference(event->rb);
|
||||
if (rb) {
|
||||
@@ -6008,6 +6083,9 @@ struct perf_buffer *ring_buffer_get(struct perf_event *event)
|
||||
{
|
||||
struct perf_buffer *rb;
|
||||
|
||||
if (event->parent)
|
||||
event = event->parent;
|
||||
|
||||
rcu_read_lock();
|
||||
rb = rcu_dereference(event->rb);
|
||||
if (rb) {
|
||||
@@ -6353,7 +6431,6 @@ accounting:
|
||||
ring_buffer_attach(event, rb);
|
||||
|
||||
perf_event_update_time(event);
|
||||
perf_set_shadow_time(event, event->ctx);
|
||||
perf_event_init_userpage(event);
|
||||
perf_event_update_userpage(event);
|
||||
} else {
|
||||
@@ -6717,7 +6794,7 @@ static unsigned long perf_prepare_sample_aux(struct perf_event *event,
|
||||
if (WARN_ON_ONCE(READ_ONCE(sampler->oncpu) != smp_processor_id()))
|
||||
goto out;
|
||||
|
||||
rb = ring_buffer_get(sampler->parent ? sampler->parent : sampler);
|
||||
rb = ring_buffer_get(sampler);
|
||||
if (!rb)
|
||||
goto out;
|
||||
|
||||
@@ -6783,7 +6860,7 @@ static void perf_aux_sample_output(struct perf_event *event,
|
||||
if (WARN_ON_ONCE(!sampler || !data->aux_size))
|
||||
return;
|
||||
|
||||
rb = ring_buffer_get(sampler->parent ? sampler->parent : sampler);
|
||||
rb = ring_buffer_get(sampler);
|
||||
if (!rb)
|
||||
return;
|
||||
|
||||
|
||||
@@ -2021,18 +2021,18 @@ static __latent_entropy struct task_struct *copy_process(
|
||||
#ifdef CONFIG_PROVE_LOCKING
|
||||
DEBUG_LOCKS_WARN_ON(!p->softirqs_enabled);
|
||||
#endif
|
||||
retval = copy_creds(p, clone_flags);
|
||||
if (retval < 0)
|
||||
goto bad_fork_free;
|
||||
|
||||
retval = -EAGAIN;
|
||||
if (is_ucounts_overlimit(task_ucounts(p), UCOUNT_RLIMIT_NPROC, rlimit(RLIMIT_NPROC))) {
|
||||
if (p->real_cred->user != INIT_USER &&
|
||||
!capable(CAP_SYS_RESOURCE) && !capable(CAP_SYS_ADMIN))
|
||||
goto bad_fork_free;
|
||||
goto bad_fork_cleanup_count;
|
||||
}
|
||||
current->flags &= ~PF_NPROC_EXCEEDED;
|
||||
|
||||
retval = copy_creds(p, clone_flags);
|
||||
if (retval < 0)
|
||||
goto bad_fork_free;
|
||||
|
||||
/*
|
||||
* If multiple threads are within copy_process(), then this check
|
||||
* triggers too late. This doesn't hurt, the check is only there
|
||||
@@ -2266,6 +2266,17 @@ static __latent_entropy struct task_struct *copy_process(
|
||||
if (retval)
|
||||
goto bad_fork_put_pidfd;
|
||||
|
||||
/*
|
||||
* Now that the cgroups are pinned, re-clone the parent cgroup and put
|
||||
* the new task on the correct runqueue. All this *before* the task
|
||||
* becomes visible.
|
||||
*
|
||||
* This isn't part of ->can_fork() because while the re-cloning is
|
||||
* cgroup specific, it unconditionally needs to place the task on a
|
||||
* runqueue.
|
||||
*/
|
||||
sched_cgroup_fork(p, args);
|
||||
|
||||
/*
|
||||
* From this point on we must avoid any synchronous user-space
|
||||
* communication until we take the tasklist-lock. In particular, we do
|
||||
@@ -2323,10 +2334,6 @@ static __latent_entropy struct task_struct *copy_process(
|
||||
goto bad_fork_cancel_cgroup;
|
||||
}
|
||||
|
||||
/* past the last point of failure */
|
||||
if (pidfile)
|
||||
fd_install(pidfd, pidfile);
|
||||
|
||||
init_task_pid_links(p);
|
||||
if (likely(p->pid)) {
|
||||
ptrace_init_task(p, (clone_flags & CLONE_PTRACE) || trace);
|
||||
@@ -2375,8 +2382,11 @@ static __latent_entropy struct task_struct *copy_process(
|
||||
syscall_tracepoint_update(p);
|
||||
write_unlock_irq(&tasklist_lock);
|
||||
|
||||
if (pidfile)
|
||||
fd_install(pidfd, pidfile);
|
||||
|
||||
proc_fork_connector(p);
|
||||
sched_post_fork(p, args);
|
||||
sched_post_fork(p);
|
||||
cgroup_post_fork(p, args);
|
||||
perf_event_fork(p);
|
||||
|
||||
|
||||
@@ -3462,7 +3462,7 @@ struct lock_class *lock_chain_get_class(struct lock_chain *chain, int i)
|
||||
u16 chain_hlock = chain_hlocks[chain->base + i];
|
||||
unsigned int class_idx = chain_hlock_class_idx(chain_hlock);
|
||||
|
||||
return lock_classes + class_idx - 1;
|
||||
return lock_classes + class_idx;
|
||||
}
|
||||
|
||||
/*
|
||||
@@ -3530,7 +3530,7 @@ static void print_chain_keys_chain(struct lock_chain *chain)
|
||||
hlock_id = chain_hlocks[chain->base + i];
|
||||
chain_key = print_chain_key_iteration(hlock_id, chain_key);
|
||||
|
||||
print_lock_name(lock_classes + chain_hlock_class_idx(hlock_id) - 1);
|
||||
print_lock_name(lock_classes + chain_hlock_class_idx(hlock_id));
|
||||
printk("\n");
|
||||
}
|
||||
}
|
||||
|
||||
@@ -3725,12 +3725,6 @@ static noinline int do_init_module(struct module *mod)
|
||||
}
|
||||
freeinit->module_init = mod->init_layout.base;
|
||||
|
||||
/*
|
||||
* We want to find out whether @mod uses async during init. Clear
|
||||
* PF_USED_ASYNC. async_schedule*() will set it.
|
||||
*/
|
||||
current->flags &= ~PF_USED_ASYNC;
|
||||
|
||||
do_mod_ctors(mod);
|
||||
/* Start the module */
|
||||
if (mod->init != NULL)
|
||||
@@ -3756,22 +3750,13 @@ static noinline int do_init_module(struct module *mod)
|
||||
|
||||
/*
|
||||
* We need to finish all async code before the module init sequence
|
||||
* is done. This has potential to deadlock. For example, a newly
|
||||
* detected block device can trigger request_module() of the
|
||||
* default iosched from async probing task. Once userland helper
|
||||
* reaches here, async_synchronize_full() will wait on the async
|
||||
* task waiting on request_module() and deadlock.
|
||||
* is done. This has potential to deadlock if synchronous module
|
||||
* loading is requested from async (which is not allowed!).
|
||||
*
|
||||
* This deadlock is avoided by perfomring async_synchronize_full()
|
||||
* iff module init queued any async jobs. This isn't a full
|
||||
* solution as it will deadlock the same if module loading from
|
||||
* async jobs nests more than once; however, due to the various
|
||||
* constraints, this hack seems to be the best option for now.
|
||||
* Please refer to the following thread for details.
|
||||
*
|
||||
* http://thread.gmane.org/gmane.linux.kernel/1420814
|
||||
* See commit 0fdff3ec6d87 ("async, kmod: warn on synchronous
|
||||
* request_module() from async workers") for more details.
|
||||
*/
|
||||
if (!mod->async_probe_requested && (current->flags & PF_USED_ASYNC))
|
||||
if (!mod->async_probe_requested)
|
||||
async_synchronize_full();
|
||||
|
||||
ftrace_free_mem(mod, mod->init_layout.base, mod->init_layout.base +
|
||||
|
||||
@@ -250,6 +250,7 @@ void module_decompress_cleanup(struct load_info *info)
|
||||
info->max_pages = info->used_pages = 0;
|
||||
}
|
||||
|
||||
#ifdef CONFIG_SYSFS
|
||||
static ssize_t compression_show(struct kobject *kobj,
|
||||
struct kobj_attribute *attr, char *buf)
|
||||
{
|
||||
@@ -269,3 +270,4 @@ static int __init module_decompress_sysfs_init(void)
|
||||
return 0;
|
||||
}
|
||||
late_initcall(module_decompress_sysfs_init);
|
||||
#endif
|
||||
|
||||
@@ -504,7 +504,10 @@ static ssize_t pm_wakeup_irq_show(struct kobject *kobj,
|
||||
struct kobj_attribute *attr,
|
||||
char *buf)
|
||||
{
|
||||
return pm_wakeup_irq ? sprintf(buf, "%u\n", pm_wakeup_irq) : -ENODATA;
|
||||
if (!pm_wakeup_irq())
|
||||
return -ENODATA;
|
||||
|
||||
return sprintf(buf, "%u\n", pm_wakeup_irq());
|
||||
}
|
||||
|
||||
power_attr_ro(pm_wakeup_irq);
|
||||
|
||||
@@ -134,7 +134,7 @@ int freeze_processes(void)
|
||||
if (!pm_freezing)
|
||||
atomic_inc(&system_freezing_cnt);
|
||||
|
||||
pm_wakeup_clear(true);
|
||||
pm_wakeup_clear(0);
|
||||
pr_info("Freezing user space processes ... ");
|
||||
pm_freezing = true;
|
||||
error = try_to_freeze_tasks(true);
|
||||
|
||||
@@ -978,8 +978,7 @@ static void memory_bm_recycle(struct memory_bitmap *bm)
|
||||
* Register a range of page frames the contents of which should not be saved
|
||||
* during hibernation (to be used in the early initialization code).
|
||||
*/
|
||||
void __init __register_nosave_region(unsigned long start_pfn,
|
||||
unsigned long end_pfn, int use_kmalloc)
|
||||
void __init register_nosave_region(unsigned long start_pfn, unsigned long end_pfn)
|
||||
{
|
||||
struct nosave_region *region;
|
||||
|
||||
@@ -995,18 +994,12 @@ void __init __register_nosave_region(unsigned long start_pfn,
|
||||
goto Report;
|
||||
}
|
||||
}
|
||||
if (use_kmalloc) {
|
||||
/* During init, this shouldn't fail */
|
||||
region = kmalloc(sizeof(struct nosave_region), GFP_KERNEL);
|
||||
BUG_ON(!region);
|
||||
} else {
|
||||
/* This allocation cannot fail */
|
||||
region = memblock_alloc(sizeof(struct nosave_region),
|
||||
SMP_CACHE_BYTES);
|
||||
if (!region)
|
||||
panic("%s: Failed to allocate %zu bytes\n", __func__,
|
||||
sizeof(struct nosave_region));
|
||||
}
|
||||
/* This allocation cannot fail */
|
||||
region = memblock_alloc(sizeof(struct nosave_region),
|
||||
SMP_CACHE_BYTES);
|
||||
if (!region)
|
||||
panic("%s: Failed to allocate %zu bytes\n", __func__,
|
||||
sizeof(struct nosave_region));
|
||||
region->start_pfn = start_pfn;
|
||||
region->end_pfn = end_pfn;
|
||||
list_add_tail(®ion->list, &nosave_regions);
|
||||
|
||||
@@ -136,8 +136,6 @@ static void s2idle_loop(void)
|
||||
break;
|
||||
}
|
||||
|
||||
pm_wakeup_clear(false);
|
||||
|
||||
s2idle_enter();
|
||||
}
|
||||
|
||||
|
||||
@@ -39,23 +39,20 @@ ssize_t pm_show_wakelocks(char *buf, bool show_active)
|
||||
{
|
||||
struct rb_node *node;
|
||||
struct wakelock *wl;
|
||||
char *str = buf;
|
||||
char *end = buf + PAGE_SIZE;
|
||||
int len = 0;
|
||||
|
||||
mutex_lock(&wakelocks_lock);
|
||||
|
||||
for (node = rb_first(&wakelocks_tree); node; node = rb_next(node)) {
|
||||
wl = rb_entry(node, struct wakelock, node);
|
||||
if (wl->ws->active == show_active)
|
||||
str += scnprintf(str, end - str, "%s ", wl->name);
|
||||
len += sysfs_emit_at(buf, len, "%s ", wl->name);
|
||||
}
|
||||
if (str > buf)
|
||||
str--;
|
||||
|
||||
str += scnprintf(str, end - str, "\n");
|
||||
len += sysfs_emit_at(buf, len, "\n");
|
||||
|
||||
mutex_unlock(&wakelocks_lock);
|
||||
return (str - buf);
|
||||
return len;
|
||||
}
|
||||
|
||||
#if CONFIG_PM_WAKELOCKS_LIMIT > 0
|
||||
|
||||
@@ -12,7 +12,7 @@
|
||||
static const int ten_thousand = 10000;
|
||||
|
||||
static int proc_dointvec_minmax_sysadmin(struct ctl_table *table, int write,
|
||||
void __user *buffer, size_t *lenp, loff_t *ppos)
|
||||
void *buffer, size_t *lenp, loff_t *ppos)
|
||||
{
|
||||
if (write && !capable(CAP_SYS_ADMIN))
|
||||
return -EPERM;
|
||||
|
||||
@@ -123,7 +123,7 @@ static struct rcu_tasks rt_name = \
|
||||
.call_func = call, \
|
||||
.rtpcpu = &rt_name ## __percpu, \
|
||||
.name = n, \
|
||||
.percpu_enqueue_shift = ilog2(CONFIG_NR_CPUS), \
|
||||
.percpu_enqueue_shift = ilog2(CONFIG_NR_CPUS) + 1, \
|
||||
.percpu_enqueue_lim = 1, \
|
||||
.percpu_dequeue_lim = 1, \
|
||||
.barrier_q_mutex = __MUTEX_INITIALIZER(rt_name.barrier_q_mutex), \
|
||||
@@ -216,6 +216,7 @@ static void cblist_init_generic(struct rcu_tasks *rtp)
|
||||
int cpu;
|
||||
unsigned long flags;
|
||||
int lim;
|
||||
int shift;
|
||||
|
||||
raw_spin_lock_irqsave(&rtp->cbs_gbl_lock, flags);
|
||||
if (rcu_task_enqueue_lim < 0) {
|
||||
@@ -229,7 +230,10 @@ static void cblist_init_generic(struct rcu_tasks *rtp)
|
||||
|
||||
if (lim > nr_cpu_ids)
|
||||
lim = nr_cpu_ids;
|
||||
WRITE_ONCE(rtp->percpu_enqueue_shift, ilog2(nr_cpu_ids / lim));
|
||||
shift = ilog2(nr_cpu_ids / lim);
|
||||
if (((nr_cpu_ids - 1) >> shift) >= lim)
|
||||
shift++;
|
||||
WRITE_ONCE(rtp->percpu_enqueue_shift, shift);
|
||||
WRITE_ONCE(rtp->percpu_dequeue_lim, lim);
|
||||
smp_store_release(&rtp->percpu_enqueue_lim, lim);
|
||||
for_each_possible_cpu(cpu) {
|
||||
@@ -298,7 +302,7 @@ static void call_rcu_tasks_generic(struct rcu_head *rhp, rcu_callback_t func,
|
||||
if (unlikely(needadjust)) {
|
||||
raw_spin_lock_irqsave(&rtp->cbs_gbl_lock, flags);
|
||||
if (rtp->percpu_enqueue_lim != nr_cpu_ids) {
|
||||
WRITE_ONCE(rtp->percpu_enqueue_shift, ilog2(nr_cpu_ids));
|
||||
WRITE_ONCE(rtp->percpu_enqueue_shift, ilog2(nr_cpu_ids) + 1);
|
||||
WRITE_ONCE(rtp->percpu_dequeue_lim, nr_cpu_ids);
|
||||
smp_store_release(&rtp->percpu_enqueue_lim, nr_cpu_ids);
|
||||
pr_info("Switching %s to per-CPU callback queuing.\n", rtp->name);
|
||||
@@ -413,7 +417,7 @@ static int rcu_tasks_need_gpcb(struct rcu_tasks *rtp)
|
||||
if (rcu_task_cb_adjust && ncbs <= rcu_task_collapse_lim) {
|
||||
raw_spin_lock_irqsave(&rtp->cbs_gbl_lock, flags);
|
||||
if (rtp->percpu_enqueue_lim > 1) {
|
||||
WRITE_ONCE(rtp->percpu_enqueue_shift, ilog2(nr_cpu_ids));
|
||||
WRITE_ONCE(rtp->percpu_enqueue_shift, ilog2(nr_cpu_ids) + 1);
|
||||
smp_store_release(&rtp->percpu_enqueue_lim, 1);
|
||||
rtp->percpu_dequeue_gpseq = get_state_synchronize_rcu();
|
||||
pr_info("Starting switch %s to CPU-0 callback queuing.\n", rtp->name);
|
||||
|
||||
@@ -4426,6 +4426,7 @@ int sched_fork(unsigned long clone_flags, struct task_struct *p)
|
||||
|
||||
init_entity_runnable_average(&p->se);
|
||||
|
||||
|
||||
#ifdef CONFIG_SCHED_INFO
|
||||
if (likely(sched_info_on()))
|
||||
memset(&p->sched_info, 0, sizeof(p->sched_info));
|
||||
@@ -4441,18 +4442,23 @@ int sched_fork(unsigned long clone_flags, struct task_struct *p)
|
||||
return 0;
|
||||
}
|
||||
|
||||
void sched_post_fork(struct task_struct *p, struct kernel_clone_args *kargs)
|
||||
void sched_cgroup_fork(struct task_struct *p, struct kernel_clone_args *kargs)
|
||||
{
|
||||
unsigned long flags;
|
||||
#ifdef CONFIG_CGROUP_SCHED
|
||||
struct task_group *tg;
|
||||
#endif
|
||||
|
||||
/*
|
||||
* Because we're not yet on the pid-hash, p->pi_lock isn't strictly
|
||||
* required yet, but lockdep gets upset if rules are violated.
|
||||
*/
|
||||
raw_spin_lock_irqsave(&p->pi_lock, flags);
|
||||
#ifdef CONFIG_CGROUP_SCHED
|
||||
tg = container_of(kargs->cset->subsys[cpu_cgrp_id],
|
||||
struct task_group, css);
|
||||
p->sched_task_group = autogroup_task_group(p, tg);
|
||||
if (1) {
|
||||
struct task_group *tg;
|
||||
tg = container_of(kargs->cset->subsys[cpu_cgrp_id],
|
||||
struct task_group, css);
|
||||
tg = autogroup_task_group(p, tg);
|
||||
p->sched_task_group = tg;
|
||||
}
|
||||
#endif
|
||||
rseq_migrate(p);
|
||||
/*
|
||||
@@ -4463,7 +4469,10 @@ void sched_post_fork(struct task_struct *p, struct kernel_clone_args *kargs)
|
||||
if (p->sched_class->task_fork)
|
||||
p->sched_class->task_fork(p);
|
||||
raw_spin_unlock_irqrestore(&p->pi_lock, flags);
|
||||
}
|
||||
|
||||
void sched_post_fork(struct task_struct *p)
|
||||
{
|
||||
uclamp_post_fork(p);
|
||||
}
|
||||
|
||||
@@ -5824,8 +5833,7 @@ pick_next_task(struct rq *rq, struct task_struct *prev, struct rq_flags *rf)
|
||||
}
|
||||
|
||||
if (schedstat_enabled() && rq->core->core_forceidle_count) {
|
||||
if (cookie)
|
||||
rq->core->core_forceidle_start = rq_clock(rq->core);
|
||||
rq->core->core_forceidle_start = rq_clock(rq->core);
|
||||
rq->core->core_forceidle_occupation = occ;
|
||||
}
|
||||
|
||||
@@ -8139,9 +8147,7 @@ int __cond_resched_lock(spinlock_t *lock)
|
||||
|
||||
if (spin_needbreak(lock) || resched) {
|
||||
spin_unlock(lock);
|
||||
if (resched)
|
||||
preempt_schedule_common();
|
||||
else
|
||||
if (!_cond_resched())
|
||||
cpu_relax();
|
||||
ret = 1;
|
||||
spin_lock(lock);
|
||||
@@ -8159,9 +8165,7 @@ int __cond_resched_rwlock_read(rwlock_t *lock)
|
||||
|
||||
if (rwlock_needbreak(lock) || resched) {
|
||||
read_unlock(lock);
|
||||
if (resched)
|
||||
preempt_schedule_common();
|
||||
else
|
||||
if (!_cond_resched())
|
||||
cpu_relax();
|
||||
ret = 1;
|
||||
read_lock(lock);
|
||||
@@ -8179,9 +8183,7 @@ int __cond_resched_rwlock_write(rwlock_t *lock)
|
||||
|
||||
if (rwlock_needbreak(lock) || resched) {
|
||||
write_unlock(lock);
|
||||
if (resched)
|
||||
preempt_schedule_common();
|
||||
else
|
||||
if (!_cond_resched())
|
||||
cpu_relax();
|
||||
ret = 1;
|
||||
write_lock(lock);
|
||||
|
||||
@@ -277,7 +277,7 @@ void __sched_core_account_forceidle(struct rq *rq)
|
||||
rq_i = cpu_rq(i);
|
||||
p = rq_i->core_pick ?: rq_i->curr;
|
||||
|
||||
if (!p->core_cookie)
|
||||
if (p == rq_i->idle)
|
||||
continue;
|
||||
|
||||
__schedstat_add(p->stats.core_forceidle_sum, delta);
|
||||
|
||||
@@ -3049,9 +3049,11 @@ enqueue_load_avg(struct cfs_rq *cfs_rq, struct sched_entity *se)
|
||||
static inline void
|
||||
dequeue_load_avg(struct cfs_rq *cfs_rq, struct sched_entity *se)
|
||||
{
|
||||
u32 divider = get_pelt_divider(&se->avg);
|
||||
sub_positive(&cfs_rq->avg.load_avg, se->avg.load_avg);
|
||||
cfs_rq->avg.load_sum = cfs_rq->avg.load_avg * divider;
|
||||
sub_positive(&cfs_rq->avg.load_sum, se_weight(se) * se->avg.load_sum);
|
||||
/* See update_cfs_rq_load_avg() */
|
||||
cfs_rq->avg.load_sum = max_t(u32, cfs_rq->avg.load_sum,
|
||||
cfs_rq->avg.load_avg * PELT_MIN_DIVIDER);
|
||||
}
|
||||
#else
|
||||
static inline void
|
||||
@@ -3402,7 +3404,6 @@ void set_task_rq_fair(struct sched_entity *se,
|
||||
se->avg.last_update_time = n_last_update_time;
|
||||
}
|
||||
|
||||
|
||||
/*
|
||||
* When on migration a sched_entity joins/leaves the PELT hierarchy, we need to
|
||||
* propagate its contribution. The key to this propagation is the invariant
|
||||
@@ -3470,15 +3471,14 @@ void set_task_rq_fair(struct sched_entity *se,
|
||||
* XXX: only do this for the part of runnable > running ?
|
||||
*
|
||||
*/
|
||||
|
||||
static inline void
|
||||
update_tg_cfs_util(struct cfs_rq *cfs_rq, struct sched_entity *se, struct cfs_rq *gcfs_rq)
|
||||
{
|
||||
long delta = gcfs_rq->avg.util_avg - se->avg.util_avg;
|
||||
u32 divider;
|
||||
long delta_sum, delta_avg = gcfs_rq->avg.util_avg - se->avg.util_avg;
|
||||
u32 new_sum, divider;
|
||||
|
||||
/* Nothing to update */
|
||||
if (!delta)
|
||||
if (!delta_avg)
|
||||
return;
|
||||
|
||||
/*
|
||||
@@ -3487,23 +3487,30 @@ update_tg_cfs_util(struct cfs_rq *cfs_rq, struct sched_entity *se, struct cfs_rq
|
||||
*/
|
||||
divider = get_pelt_divider(&cfs_rq->avg);
|
||||
|
||||
|
||||
/* Set new sched_entity's utilization */
|
||||
se->avg.util_avg = gcfs_rq->avg.util_avg;
|
||||
se->avg.util_sum = se->avg.util_avg * divider;
|
||||
new_sum = se->avg.util_avg * divider;
|
||||
delta_sum = (long)new_sum - (long)se->avg.util_sum;
|
||||
se->avg.util_sum = new_sum;
|
||||
|
||||
/* Update parent cfs_rq utilization */
|
||||
add_positive(&cfs_rq->avg.util_avg, delta);
|
||||
cfs_rq->avg.util_sum = cfs_rq->avg.util_avg * divider;
|
||||
add_positive(&cfs_rq->avg.util_avg, delta_avg);
|
||||
add_positive(&cfs_rq->avg.util_sum, delta_sum);
|
||||
|
||||
/* See update_cfs_rq_load_avg() */
|
||||
cfs_rq->avg.util_sum = max_t(u32, cfs_rq->avg.util_sum,
|
||||
cfs_rq->avg.util_avg * PELT_MIN_DIVIDER);
|
||||
}
|
||||
|
||||
static inline void
|
||||
update_tg_cfs_runnable(struct cfs_rq *cfs_rq, struct sched_entity *se, struct cfs_rq *gcfs_rq)
|
||||
{
|
||||
long delta = gcfs_rq->avg.runnable_avg - se->avg.runnable_avg;
|
||||
u32 divider;
|
||||
long delta_sum, delta_avg = gcfs_rq->avg.runnable_avg - se->avg.runnable_avg;
|
||||
u32 new_sum, divider;
|
||||
|
||||
/* Nothing to update */
|
||||
if (!delta)
|
||||
if (!delta_avg)
|
||||
return;
|
||||
|
||||
/*
|
||||
@@ -3514,19 +3521,25 @@ update_tg_cfs_runnable(struct cfs_rq *cfs_rq, struct sched_entity *se, struct cf
|
||||
|
||||
/* Set new sched_entity's runnable */
|
||||
se->avg.runnable_avg = gcfs_rq->avg.runnable_avg;
|
||||
se->avg.runnable_sum = se->avg.runnable_avg * divider;
|
||||
new_sum = se->avg.runnable_avg * divider;
|
||||
delta_sum = (long)new_sum - (long)se->avg.runnable_sum;
|
||||
se->avg.runnable_sum = new_sum;
|
||||
|
||||
/* Update parent cfs_rq runnable */
|
||||
add_positive(&cfs_rq->avg.runnable_avg, delta);
|
||||
cfs_rq->avg.runnable_sum = cfs_rq->avg.runnable_avg * divider;
|
||||
add_positive(&cfs_rq->avg.runnable_avg, delta_avg);
|
||||
add_positive(&cfs_rq->avg.runnable_sum, delta_sum);
|
||||
/* See update_cfs_rq_load_avg() */
|
||||
cfs_rq->avg.runnable_sum = max_t(u32, cfs_rq->avg.runnable_sum,
|
||||
cfs_rq->avg.runnable_avg * PELT_MIN_DIVIDER);
|
||||
}
|
||||
|
||||
static inline void
|
||||
update_tg_cfs_load(struct cfs_rq *cfs_rq, struct sched_entity *se, struct cfs_rq *gcfs_rq)
|
||||
{
|
||||
long delta, running_sum, runnable_sum = gcfs_rq->prop_runnable_sum;
|
||||
long delta_avg, running_sum, runnable_sum = gcfs_rq->prop_runnable_sum;
|
||||
unsigned long load_avg;
|
||||
u64 load_sum = 0;
|
||||
s64 delta_sum;
|
||||
u32 divider;
|
||||
|
||||
if (!runnable_sum)
|
||||
@@ -3553,7 +3566,7 @@ update_tg_cfs_load(struct cfs_rq *cfs_rq, struct sched_entity *se, struct cfs_rq
|
||||
* assuming all tasks are equally runnable.
|
||||
*/
|
||||
if (scale_load_down(gcfs_rq->load.weight)) {
|
||||
load_sum = div_s64(gcfs_rq->avg.load_sum,
|
||||
load_sum = div_u64(gcfs_rq->avg.load_sum,
|
||||
scale_load_down(gcfs_rq->load.weight));
|
||||
}
|
||||
|
||||
@@ -3570,19 +3583,22 @@ update_tg_cfs_load(struct cfs_rq *cfs_rq, struct sched_entity *se, struct cfs_rq
|
||||
running_sum = se->avg.util_sum >> SCHED_CAPACITY_SHIFT;
|
||||
runnable_sum = max(runnable_sum, running_sum);
|
||||
|
||||
load_sum = (s64)se_weight(se) * runnable_sum;
|
||||
load_avg = div_s64(load_sum, divider);
|
||||
load_sum = se_weight(se) * runnable_sum;
|
||||
load_avg = div_u64(load_sum, divider);
|
||||
|
||||
se->avg.load_sum = runnable_sum;
|
||||
|
||||
delta = load_avg - se->avg.load_avg;
|
||||
if (!delta)
|
||||
delta_avg = load_avg - se->avg.load_avg;
|
||||
if (!delta_avg)
|
||||
return;
|
||||
|
||||
se->avg.load_avg = load_avg;
|
||||
delta_sum = load_sum - (s64)se_weight(se) * se->avg.load_sum;
|
||||
|
||||
add_positive(&cfs_rq->avg.load_avg, delta);
|
||||
cfs_rq->avg.load_sum = cfs_rq->avg.load_avg * divider;
|
||||
se->avg.load_sum = runnable_sum;
|
||||
se->avg.load_avg = load_avg;
|
||||
add_positive(&cfs_rq->avg.load_avg, delta_avg);
|
||||
add_positive(&cfs_rq->avg.load_sum, delta_sum);
|
||||
/* See update_cfs_rq_load_avg() */
|
||||
cfs_rq->avg.load_sum = max_t(u32, cfs_rq->avg.load_sum,
|
||||
cfs_rq->avg.load_avg * PELT_MIN_DIVIDER);
|
||||
}
|
||||
|
||||
static inline void add_tg_cfs_propagate(struct cfs_rq *cfs_rq, long runnable_sum)
|
||||
@@ -3673,7 +3689,7 @@ static inline void add_tg_cfs_propagate(struct cfs_rq *cfs_rq, long runnable_sum
|
||||
*
|
||||
* cfs_rq->avg is used for task_h_load() and update_cfs_share() for example.
|
||||
*
|
||||
* Returns true if the load decayed or we removed load.
|
||||
* Return: true if the load decayed or we removed load.
|
||||
*
|
||||
* Since both these conditions indicate a changed cfs_rq->avg.load we should
|
||||
* call update_tg_load_avg() when this function returns true.
|
||||
@@ -3698,15 +3714,32 @@ update_cfs_rq_load_avg(u64 now, struct cfs_rq *cfs_rq)
|
||||
|
||||
r = removed_load;
|
||||
sub_positive(&sa->load_avg, r);
|
||||
sa->load_sum = sa->load_avg * divider;
|
||||
sub_positive(&sa->load_sum, r * divider);
|
||||
/* See sa->util_sum below */
|
||||
sa->load_sum = max_t(u32, sa->load_sum, sa->load_avg * PELT_MIN_DIVIDER);
|
||||
|
||||
r = removed_util;
|
||||
sub_positive(&sa->util_avg, r);
|
||||
sa->util_sum = sa->util_avg * divider;
|
||||
sub_positive(&sa->util_sum, r * divider);
|
||||
/*
|
||||
* Because of rounding, se->util_sum might ends up being +1 more than
|
||||
* cfs->util_sum. Although this is not a problem by itself, detaching
|
||||
* a lot of tasks with the rounding problem between 2 updates of
|
||||
* util_avg (~1ms) can make cfs->util_sum becoming null whereas
|
||||
* cfs_util_avg is not.
|
||||
* Check that util_sum is still above its lower bound for the new
|
||||
* util_avg. Given that period_contrib might have moved since the last
|
||||
* sync, we are only sure that util_sum must be above or equal to
|
||||
* util_avg * minimum possible divider
|
||||
*/
|
||||
sa->util_sum = max_t(u32, sa->util_sum, sa->util_avg * PELT_MIN_DIVIDER);
|
||||
|
||||
r = removed_runnable;
|
||||
sub_positive(&sa->runnable_avg, r);
|
||||
sa->runnable_sum = sa->runnable_avg * divider;
|
||||
sub_positive(&sa->runnable_sum, r * divider);
|
||||
/* See sa->util_sum above */
|
||||
sa->runnable_sum = max_t(u32, sa->runnable_sum,
|
||||
sa->runnable_avg * PELT_MIN_DIVIDER);
|
||||
|
||||
/*
|
||||
* removed_runnable is the unweighted version of removed_load so we
|
||||
@@ -3793,17 +3826,18 @@ static void attach_entity_load_avg(struct cfs_rq *cfs_rq, struct sched_entity *s
|
||||
*/
|
||||
static void detach_entity_load_avg(struct cfs_rq *cfs_rq, struct sched_entity *se)
|
||||
{
|
||||
/*
|
||||
* cfs_rq->avg.period_contrib can be used for both cfs_rq and se.
|
||||
* See ___update_load_avg() for details.
|
||||
*/
|
||||
u32 divider = get_pelt_divider(&cfs_rq->avg);
|
||||
|
||||
dequeue_load_avg(cfs_rq, se);
|
||||
sub_positive(&cfs_rq->avg.util_avg, se->avg.util_avg);
|
||||
cfs_rq->avg.util_sum = cfs_rq->avg.util_avg * divider;
|
||||
sub_positive(&cfs_rq->avg.util_sum, se->avg.util_sum);
|
||||
/* See update_cfs_rq_load_avg() */
|
||||
cfs_rq->avg.util_sum = max_t(u32, cfs_rq->avg.util_sum,
|
||||
cfs_rq->avg.util_avg * PELT_MIN_DIVIDER);
|
||||
|
||||
sub_positive(&cfs_rq->avg.runnable_avg, se->avg.runnable_avg);
|
||||
cfs_rq->avg.runnable_sum = cfs_rq->avg.runnable_avg * divider;
|
||||
sub_positive(&cfs_rq->avg.runnable_sum, se->avg.runnable_sum);
|
||||
/* See update_cfs_rq_load_avg() */
|
||||
cfs_rq->avg.runnable_sum = max_t(u32, cfs_rq->avg.runnable_sum,
|
||||
cfs_rq->avg.runnable_avg * PELT_MIN_DIVIDER);
|
||||
|
||||
add_tg_cfs_propagate(cfs_rq, -se->avg.load_sum);
|
||||
|
||||
@@ -8560,6 +8594,8 @@ group_type group_classify(unsigned int imbalance_pct,
|
||||
*
|
||||
* If @sg does not have SMT siblings, only pull tasks if all of the SMT siblings
|
||||
* of @dst_cpu are idle and @sg has lower priority.
|
||||
*
|
||||
* Return: true if @dst_cpu can pull tasks, false otherwise.
|
||||
*/
|
||||
static bool asym_smt_can_pull_tasks(int dst_cpu, struct sd_lb_stats *sds,
|
||||
struct sg_lb_stats *sgs,
|
||||
@@ -8635,6 +8671,7 @@ sched_asym(struct lb_env *env, struct sd_lb_stats *sds, struct sg_lb_stats *sgs
|
||||
/**
|
||||
* update_sg_lb_stats - Update sched_group's statistics for load balancing.
|
||||
* @env: The load balancing environment.
|
||||
* @sds: Load-balancing data with statistics of the local group.
|
||||
* @group: sched_group whose statistics are to be updated.
|
||||
* @sgs: variable to hold the statistics for this group.
|
||||
* @sg_status: Holds flag indicating the status of the sched_group
|
||||
@@ -9443,12 +9480,11 @@ static inline void calculate_imbalance(struct lb_env *env, struct sd_lb_stats *s
|
||||
/**
|
||||
* find_busiest_group - Returns the busiest group within the sched_domain
|
||||
* if there is an imbalance.
|
||||
* @env: The load balancing environment.
|
||||
*
|
||||
* Also calculates the amount of runnable load which should be moved
|
||||
* to restore balance.
|
||||
*
|
||||
* @env: The load balancing environment.
|
||||
*
|
||||
* Return: - The busiest group if imbalance exists.
|
||||
*/
|
||||
static struct sched_group *find_busiest_group(struct lb_env *env)
|
||||
|
||||
@@ -147,11 +147,11 @@
|
||||
#endif
|
||||
|
||||
#ifdef CONFIG_RSEQ
|
||||
#define MEMBARRIER_CMD_PRIVATE_EXPEDITED_RSEQ_BITMASK \
|
||||
#define MEMBARRIER_PRIVATE_EXPEDITED_RSEQ_BITMASK \
|
||||
(MEMBARRIER_CMD_PRIVATE_EXPEDITED_RSEQ \
|
||||
| MEMBARRIER_CMD_REGISTER_PRIVATE_EXPEDITED_RSEQ_BITMASK)
|
||||
| MEMBARRIER_CMD_REGISTER_PRIVATE_EXPEDITED_RSEQ)
|
||||
#else
|
||||
#define MEMBARRIER_CMD_PRIVATE_EXPEDITED_RSEQ_BITMASK 0
|
||||
#define MEMBARRIER_PRIVATE_EXPEDITED_RSEQ_BITMASK 0
|
||||
#endif
|
||||
|
||||
#define MEMBARRIER_CMD_BITMASK \
|
||||
@@ -159,7 +159,8 @@
|
||||
| MEMBARRIER_CMD_REGISTER_GLOBAL_EXPEDITED \
|
||||
| MEMBARRIER_CMD_PRIVATE_EXPEDITED \
|
||||
| MEMBARRIER_CMD_REGISTER_PRIVATE_EXPEDITED \
|
||||
| MEMBARRIER_PRIVATE_EXPEDITED_SYNC_CORE_BITMASK)
|
||||
| MEMBARRIER_PRIVATE_EXPEDITED_SYNC_CORE_BITMASK \
|
||||
| MEMBARRIER_PRIVATE_EXPEDITED_RSEQ_BITMASK)
|
||||
|
||||
static void ipi_mb(void *info)
|
||||
{
|
||||
|
||||
@@ -37,9 +37,11 @@ update_irq_load_avg(struct rq *rq, u64 running)
|
||||
}
|
||||
#endif
|
||||
|
||||
#define PELT_MIN_DIVIDER (LOAD_AVG_MAX - 1024)
|
||||
|
||||
static inline u32 get_pelt_divider(struct sched_avg *avg)
|
||||
{
|
||||
return LOAD_AVG_MAX - 1024 + avg->period_contrib;
|
||||
return PELT_MIN_DIVIDER + avg->period_contrib;
|
||||
}
|
||||
|
||||
static inline void cfs_se_util_change(struct sched_avg *avg)
|
||||
|
||||
@@ -1137,7 +1137,6 @@ struct psi_trigger *psi_trigger_create(struct psi_group *group,
|
||||
t->event = 0;
|
||||
t->last_event_time = 0;
|
||||
init_waitqueue_head(&t->event_wait);
|
||||
kref_init(&t->refcount);
|
||||
t->pending_event = false;
|
||||
|
||||
mutex_lock(&group->trigger_lock);
|
||||
@@ -1167,15 +1166,19 @@ struct psi_trigger *psi_trigger_create(struct psi_group *group,
|
||||
return t;
|
||||
}
|
||||
|
||||
static void psi_trigger_destroy(struct kref *ref)
|
||||
void psi_trigger_destroy(struct psi_trigger *t)
|
||||
{
|
||||
struct psi_trigger *t = container_of(ref, struct psi_trigger, refcount);
|
||||
struct psi_group *group = t->group;
|
||||
struct psi_group *group;
|
||||
struct task_struct *task_to_destroy = NULL;
|
||||
|
||||
if (static_branch_likely(&psi_disabled))
|
||||
/*
|
||||
* We do not check psi_disabled since it might have been disabled after
|
||||
* the trigger got created.
|
||||
*/
|
||||
if (!t)
|
||||
return;
|
||||
|
||||
group = t->group;
|
||||
/*
|
||||
* Wakeup waiters to stop polling. Can happen if cgroup is deleted
|
||||
* from under a polling process.
|
||||
@@ -1211,9 +1214,9 @@ static void psi_trigger_destroy(struct kref *ref)
|
||||
mutex_unlock(&group->trigger_lock);
|
||||
|
||||
/*
|
||||
* Wait for both *trigger_ptr from psi_trigger_replace and
|
||||
* poll_task RCUs to complete their read-side critical sections
|
||||
* before destroying the trigger and optionally the poll_task
|
||||
* Wait for psi_schedule_poll_work RCU to complete its read-side
|
||||
* critical section before destroying the trigger and optionally the
|
||||
* poll_task.
|
||||
*/
|
||||
synchronize_rcu();
|
||||
/*
|
||||
@@ -1230,18 +1233,6 @@ static void psi_trigger_destroy(struct kref *ref)
|
||||
kfree(t);
|
||||
}
|
||||
|
||||
void psi_trigger_replace(void **trigger_ptr, struct psi_trigger *new)
|
||||
{
|
||||
struct psi_trigger *old = *trigger_ptr;
|
||||
|
||||
if (static_branch_likely(&psi_disabled))
|
||||
return;
|
||||
|
||||
rcu_assign_pointer(*trigger_ptr, new);
|
||||
if (old)
|
||||
kref_put(&old->refcount, psi_trigger_destroy);
|
||||
}
|
||||
|
||||
__poll_t psi_trigger_poll(void **trigger_ptr,
|
||||
struct file *file, poll_table *wait)
|
||||
{
|
||||
@@ -1251,24 +1242,15 @@ __poll_t psi_trigger_poll(void **trigger_ptr,
|
||||
if (static_branch_likely(&psi_disabled))
|
||||
return DEFAULT_POLLMASK | EPOLLERR | EPOLLPRI;
|
||||
|
||||
rcu_read_lock();
|
||||
|
||||
t = rcu_dereference(*(void __rcu __force **)trigger_ptr);
|
||||
if (!t) {
|
||||
rcu_read_unlock();
|
||||
t = smp_load_acquire(trigger_ptr);
|
||||
if (!t)
|
||||
return DEFAULT_POLLMASK | EPOLLERR | EPOLLPRI;
|
||||
}
|
||||
kref_get(&t->refcount);
|
||||
|
||||
rcu_read_unlock();
|
||||
|
||||
poll_wait(file, &t->event_wait, wait);
|
||||
|
||||
if (cmpxchg(&t->event, 1, 0) == 1)
|
||||
ret |= EPOLLPRI;
|
||||
|
||||
kref_put(&t->refcount, psi_trigger_destroy);
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
@@ -1331,14 +1313,24 @@ static ssize_t psi_write(struct file *file, const char __user *user_buf,
|
||||
|
||||
buf[buf_size - 1] = '\0';
|
||||
|
||||
new = psi_trigger_create(&psi_system, buf, nbytes, res);
|
||||
if (IS_ERR(new))
|
||||
return PTR_ERR(new);
|
||||
|
||||
seq = file->private_data;
|
||||
|
||||
/* Take seq->lock to protect seq->private from concurrent writes */
|
||||
mutex_lock(&seq->lock);
|
||||
psi_trigger_replace(&seq->private, new);
|
||||
|
||||
/* Allow only one trigger per file descriptor */
|
||||
if (seq->private) {
|
||||
mutex_unlock(&seq->lock);
|
||||
return -EBUSY;
|
||||
}
|
||||
|
||||
new = psi_trigger_create(&psi_system, buf, nbytes, res);
|
||||
if (IS_ERR(new)) {
|
||||
mutex_unlock(&seq->lock);
|
||||
return PTR_ERR(new);
|
||||
}
|
||||
|
||||
smp_store_release(&seq->private, new);
|
||||
mutex_unlock(&seq->lock);
|
||||
|
||||
return nbytes;
|
||||
@@ -1373,7 +1365,7 @@ static int psi_fop_release(struct inode *inode, struct file *file)
|
||||
{
|
||||
struct seq_file *seq = file->private_data;
|
||||
|
||||
psi_trigger_replace(&seq->private, NULL);
|
||||
psi_trigger_destroy(seq->private);
|
||||
return single_release(inode, file);
|
||||
}
|
||||
|
||||
|
||||
@@ -29,6 +29,9 @@
|
||||
#include <linux/syscalls.h>
|
||||
#include <linux/sysctl.h>
|
||||
|
||||
/* Not exposed in headers: strictly internal use only. */
|
||||
#define SECCOMP_MODE_DEAD (SECCOMP_MODE_FILTER + 1)
|
||||
|
||||
#ifdef CONFIG_HAVE_ARCH_SECCOMP_FILTER
|
||||
#include <asm/syscall.h>
|
||||
#endif
|
||||
@@ -1010,6 +1013,7 @@ static void __secure_computing_strict(int this_syscall)
|
||||
#ifdef SECCOMP_DEBUG
|
||||
dump_stack();
|
||||
#endif
|
||||
current->seccomp.mode = SECCOMP_MODE_DEAD;
|
||||
seccomp_log(this_syscall, SIGKILL, SECCOMP_RET_KILL_THREAD, true);
|
||||
do_exit(SIGKILL);
|
||||
}
|
||||
@@ -1261,6 +1265,7 @@ static int __seccomp_filter(int this_syscall, const struct seccomp_data *sd,
|
||||
case SECCOMP_RET_KILL_THREAD:
|
||||
case SECCOMP_RET_KILL_PROCESS:
|
||||
default:
|
||||
current->seccomp.mode = SECCOMP_MODE_DEAD;
|
||||
seccomp_log(this_syscall, SIGSYS, action, true);
|
||||
/* Dump core only if this is the last remaining thread. */
|
||||
if (action != SECCOMP_RET_KILL_THREAD ||
|
||||
@@ -1309,6 +1314,11 @@ int __secure_computing(const struct seccomp_data *sd)
|
||||
return 0;
|
||||
case SECCOMP_MODE_FILTER:
|
||||
return __seccomp_filter(this_syscall, sd, false);
|
||||
/* Surviving SECCOMP_RET_KILL_* must be proactively impossible. */
|
||||
case SECCOMP_MODE_DEAD:
|
||||
WARN_ON_ONCE(1);
|
||||
do_exit(SIGKILL);
|
||||
return -1;
|
||||
default:
|
||||
BUG();
|
||||
}
|
||||
|
||||
@@ -1342,9 +1342,10 @@ force_sig_info_to_task(struct kernel_siginfo *info, struct task_struct *t,
|
||||
}
|
||||
/*
|
||||
* Don't clear SIGNAL_UNKILLABLE for traced tasks, users won't expect
|
||||
* debugging to leave init killable.
|
||||
* debugging to leave init killable. But HANDLER_EXIT is always fatal.
|
||||
*/
|
||||
if (action->sa.sa_handler == SIG_DFL && !t->ptrace)
|
||||
if (action->sa.sa_handler == SIG_DFL &&
|
||||
(!t->ptrace || (handler == HANDLER_EXIT)))
|
||||
t->signal->flags &= ~SIGNAL_UNKILLABLE;
|
||||
ret = send_signal(sig, info, t, PIDTYPE_PID);
|
||||
spin_unlock_irqrestore(&t->sighand->siglock, flags);
|
||||
|
||||
@@ -70,7 +70,7 @@ late_initcall(stackleak_sysctls_init);
|
||||
#define skip_erasing() false
|
||||
#endif /* CONFIG_STACKLEAK_RUNTIME_DISABLE */
|
||||
|
||||
asmlinkage void notrace stackleak_erase(void)
|
||||
asmlinkage void noinstr stackleak_erase(void)
|
||||
{
|
||||
/* It would be nice not to have 'kstack_ptr' and 'boundary' on stack */
|
||||
unsigned long kstack_ptr = current->lowest_stack;
|
||||
@@ -124,9 +124,8 @@ asmlinkage void notrace stackleak_erase(void)
|
||||
/* Reset the 'lowest_stack' value for the next syscall */
|
||||
current->lowest_stack = current_top_of_stack() - THREAD_SIZE/64;
|
||||
}
|
||||
NOKPROBE_SYMBOL(stackleak_erase);
|
||||
|
||||
void __used __no_caller_saved_registers notrace stackleak_track_stack(void)
|
||||
void __used __no_caller_saved_registers noinstr stackleak_track_stack(void)
|
||||
{
|
||||
unsigned long sp = current_stack_pointer;
|
||||
|
||||
|
||||
20
kernel/sys.c
20
kernel/sys.c
@@ -472,6 +472,16 @@ static int set_user(struct cred *new)
|
||||
if (!new_user)
|
||||
return -EAGAIN;
|
||||
|
||||
free_uid(new->user);
|
||||
new->user = new_user;
|
||||
return 0;
|
||||
}
|
||||
|
||||
static void flag_nproc_exceeded(struct cred *new)
|
||||
{
|
||||
if (new->ucounts == current_ucounts())
|
||||
return;
|
||||
|
||||
/*
|
||||
* We don't fail in case of NPROC limit excess here because too many
|
||||
* poorly written programs don't check set*uid() return code, assuming
|
||||
@@ -480,15 +490,10 @@ static int set_user(struct cred *new)
|
||||
* failure to the execve() stage.
|
||||
*/
|
||||
if (is_ucounts_overlimit(new->ucounts, UCOUNT_RLIMIT_NPROC, rlimit(RLIMIT_NPROC)) &&
|
||||
new_user != INIT_USER &&
|
||||
!capable(CAP_SYS_RESOURCE) && !capable(CAP_SYS_ADMIN))
|
||||
new->user != INIT_USER)
|
||||
current->flags |= PF_NPROC_EXCEEDED;
|
||||
else
|
||||
current->flags &= ~PF_NPROC_EXCEEDED;
|
||||
|
||||
free_uid(new->user);
|
||||
new->user = new_user;
|
||||
return 0;
|
||||
}
|
||||
|
||||
/*
|
||||
@@ -563,6 +568,7 @@ long __sys_setreuid(uid_t ruid, uid_t euid)
|
||||
if (retval < 0)
|
||||
goto error;
|
||||
|
||||
flag_nproc_exceeded(new);
|
||||
return commit_creds(new);
|
||||
|
||||
error:
|
||||
@@ -625,6 +631,7 @@ long __sys_setuid(uid_t uid)
|
||||
if (retval < 0)
|
||||
goto error;
|
||||
|
||||
flag_nproc_exceeded(new);
|
||||
return commit_creds(new);
|
||||
|
||||
error:
|
||||
@@ -704,6 +711,7 @@ long __sys_setresuid(uid_t ruid, uid_t euid, uid_t suid)
|
||||
if (retval < 0)
|
||||
goto error;
|
||||
|
||||
flag_nproc_exceeded(new);
|
||||
return commit_creds(new);
|
||||
|
||||
error:
|
||||
|
||||
@@ -70,10 +70,16 @@ config HAVE_C_RECORDMCOUNT
|
||||
help
|
||||
C version of recordmcount available?
|
||||
|
||||
config HAVE_BUILDTIME_MCOUNT_SORT
|
||||
bool
|
||||
help
|
||||
An architecture selects this if it sorts the mcount_loc section
|
||||
at build time.
|
||||
|
||||
config BUILDTIME_MCOUNT_SORT
|
||||
bool
|
||||
default y
|
||||
depends on BUILDTIME_TABLE_SORT && !S390
|
||||
depends on HAVE_BUILDTIME_MCOUNT_SORT && DYNAMIC_FTRACE
|
||||
help
|
||||
Sort the mcount_loc section at build time.
|
||||
|
||||
|
||||
@@ -252,6 +252,10 @@ __setup("trace_clock=", set_trace_boot_clock);
|
||||
|
||||
static int __init set_tracepoint_printk(char *str)
|
||||
{
|
||||
/* Ignore the "tp_printk_stop_on_boot" param */
|
||||
if (*str == '_')
|
||||
return 0;
|
||||
|
||||
if ((strcmp(str, "=0") != 0 && strcmp(str, "=off") != 0))
|
||||
tracepoint_printk = 1;
|
||||
return 1;
|
||||
@@ -7740,7 +7744,8 @@ static struct tracing_log_err *get_tracing_log_err(struct trace_array *tr)
|
||||
err = kzalloc(sizeof(*err), GFP_KERNEL);
|
||||
if (!err)
|
||||
err = ERR_PTR(-ENOMEM);
|
||||
tr->n_err_log_entries++;
|
||||
else
|
||||
tr->n_err_log_entries++;
|
||||
|
||||
return err;
|
||||
}
|
||||
|
||||
@@ -2503,6 +2503,8 @@ static struct hist_field *parse_unary(struct hist_trigger_data *hist_data,
|
||||
(HIST_FIELD_FL_TIMESTAMP | HIST_FIELD_FL_TIMESTAMP_USECS);
|
||||
expr->fn = hist_field_unary_minus;
|
||||
expr->operands[0] = operand1;
|
||||
expr->size = operand1->size;
|
||||
expr->is_signed = operand1->is_signed;
|
||||
expr->operator = FIELD_OP_UNARY_MINUS;
|
||||
expr->name = expr_str(expr, 0);
|
||||
expr->type = kstrdup_const(operand1->type, GFP_KERNEL);
|
||||
@@ -2719,6 +2721,7 @@ static struct hist_field *parse_expr(struct hist_trigger_data *hist_data,
|
||||
|
||||
/* The operand sizes should be the same, so just pick one */
|
||||
expr->size = operand1->size;
|
||||
expr->is_signed = operand1->is_signed;
|
||||
|
||||
expr->operator = field_op;
|
||||
expr->type = kstrdup_const(operand1->type, GFP_KERNEL);
|
||||
@@ -3935,6 +3938,7 @@ static int trace_action_create(struct hist_trigger_data *hist_data,
|
||||
|
||||
var_ref_idx = find_var_ref_idx(hist_data, var_ref);
|
||||
if (WARN_ON(var_ref_idx < 0)) {
|
||||
kfree(p);
|
||||
ret = var_ref_idx;
|
||||
goto err;
|
||||
}
|
||||
@@ -6163,7 +6167,9 @@ static int event_hist_trigger_parse(struct event_command *cmd_ops,
|
||||
|
||||
lockdep_assert_held(&event_mutex);
|
||||
|
||||
if (glob && strlen(glob)) {
|
||||
WARN_ON(!glob);
|
||||
|
||||
if (strlen(glob)) {
|
||||
hist_err_clear();
|
||||
last_cmd_set(file, param);
|
||||
}
|
||||
@@ -6196,7 +6202,7 @@ static int event_hist_trigger_parse(struct event_command *cmd_ops,
|
||||
continue;
|
||||
}
|
||||
break;
|
||||
} while (p);
|
||||
} while (1);
|
||||
|
||||
if (!p)
|
||||
param = NULL;
|
||||
|
||||
@@ -190,6 +190,7 @@ struct ucounts *alloc_ucounts(struct user_namespace *ns, kuid_t uid)
|
||||
kfree(new);
|
||||
} else {
|
||||
hlist_add_head(&new->node, hashent);
|
||||
get_user_ns(new->ns);
|
||||
spin_unlock_irq(&ucounts_lock);
|
||||
return new;
|
||||
}
|
||||
@@ -210,6 +211,7 @@ void put_ucounts(struct ucounts *ucounts)
|
||||
if (atomic_dec_and_lock_irqsave(&ucounts->count, &ucounts_lock, flags)) {
|
||||
hlist_del_init(&ucounts->node);
|
||||
spin_unlock_irqrestore(&ucounts_lock, flags);
|
||||
put_user_ns(ucounts->ns);
|
||||
kfree(ucounts);
|
||||
}
|
||||
}
|
||||
@@ -348,7 +350,8 @@ bool is_ucounts_overlimit(struct ucounts *ucounts, enum ucount_type type, unsign
|
||||
if (rlimit > LONG_MAX)
|
||||
max = LONG_MAX;
|
||||
for (iter = ucounts; iter; iter = iter->ns->ucounts) {
|
||||
if (get_ucounts_value(iter, type) > max)
|
||||
long val = get_ucounts_value(iter, type);
|
||||
if (val < 0 || val > max)
|
||||
return true;
|
||||
max = READ_ONCE(iter->ns->ucount_max[type]);
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user