Merge branch 'sched/urgent' into sched/core to pick up fixes
Signed-off-by: Ingo Molnar <mingo@kernel.org>
This commit is contained in:
@@ -31,10 +31,10 @@ static void *bpf_any_get(void *raw, enum bpf_type type)
|
||||
{
|
||||
switch (type) {
|
||||
case BPF_TYPE_PROG:
|
||||
atomic_inc(&((struct bpf_prog *)raw)->aux->refcnt);
|
||||
raw = bpf_prog_inc(raw);
|
||||
break;
|
||||
case BPF_TYPE_MAP:
|
||||
bpf_map_inc(raw, true);
|
||||
raw = bpf_map_inc(raw, true);
|
||||
break;
|
||||
default:
|
||||
WARN_ON_ONCE(1);
|
||||
@@ -297,7 +297,8 @@ static void *bpf_obj_do_get(const struct filename *pathname,
|
||||
goto out;
|
||||
|
||||
raw = bpf_any_get(inode->i_private, *type);
|
||||
touch_atime(&path);
|
||||
if (!IS_ERR(raw))
|
||||
touch_atime(&path);
|
||||
|
||||
path_put(&path);
|
||||
return raw;
|
||||
|
||||
@@ -218,11 +218,18 @@ struct bpf_map *__bpf_map_get(struct fd f)
|
||||
return f.file->private_data;
|
||||
}
|
||||
|
||||
void bpf_map_inc(struct bpf_map *map, bool uref)
|
||||
/* prog's and map's refcnt limit */
|
||||
#define BPF_MAX_REFCNT 32768
|
||||
|
||||
struct bpf_map *bpf_map_inc(struct bpf_map *map, bool uref)
|
||||
{
|
||||
atomic_inc(&map->refcnt);
|
||||
if (atomic_inc_return(&map->refcnt) > BPF_MAX_REFCNT) {
|
||||
atomic_dec(&map->refcnt);
|
||||
return ERR_PTR(-EBUSY);
|
||||
}
|
||||
if (uref)
|
||||
atomic_inc(&map->usercnt);
|
||||
return map;
|
||||
}
|
||||
|
||||
struct bpf_map *bpf_map_get_with_uref(u32 ufd)
|
||||
@@ -234,7 +241,7 @@ struct bpf_map *bpf_map_get_with_uref(u32 ufd)
|
||||
if (IS_ERR(map))
|
||||
return map;
|
||||
|
||||
bpf_map_inc(map, true);
|
||||
map = bpf_map_inc(map, true);
|
||||
fdput(f);
|
||||
|
||||
return map;
|
||||
@@ -658,6 +665,15 @@ static struct bpf_prog *__bpf_prog_get(struct fd f)
|
||||
return f.file->private_data;
|
||||
}
|
||||
|
||||
struct bpf_prog *bpf_prog_inc(struct bpf_prog *prog)
|
||||
{
|
||||
if (atomic_inc_return(&prog->aux->refcnt) > BPF_MAX_REFCNT) {
|
||||
atomic_dec(&prog->aux->refcnt);
|
||||
return ERR_PTR(-EBUSY);
|
||||
}
|
||||
return prog;
|
||||
}
|
||||
|
||||
/* called by sockets/tracing/seccomp before attaching program to an event
|
||||
* pairs with bpf_prog_put()
|
||||
*/
|
||||
@@ -670,7 +686,7 @@ struct bpf_prog *bpf_prog_get(u32 ufd)
|
||||
if (IS_ERR(prog))
|
||||
return prog;
|
||||
|
||||
atomic_inc(&prog->aux->refcnt);
|
||||
prog = bpf_prog_inc(prog);
|
||||
fdput(f);
|
||||
|
||||
return prog;
|
||||
|
||||
@@ -239,16 +239,6 @@ static const char * const reg_type_str[] = {
|
||||
[CONST_IMM] = "imm",
|
||||
};
|
||||
|
||||
static const struct {
|
||||
int map_type;
|
||||
int func_id;
|
||||
} func_limit[] = {
|
||||
{BPF_MAP_TYPE_PROG_ARRAY, BPF_FUNC_tail_call},
|
||||
{BPF_MAP_TYPE_PERF_EVENT_ARRAY, BPF_FUNC_perf_event_read},
|
||||
{BPF_MAP_TYPE_PERF_EVENT_ARRAY, BPF_FUNC_perf_event_output},
|
||||
{BPF_MAP_TYPE_STACK_TRACE, BPF_FUNC_get_stackid},
|
||||
};
|
||||
|
||||
static void print_verifier_state(struct verifier_env *env)
|
||||
{
|
||||
enum bpf_reg_type t;
|
||||
@@ -921,27 +911,52 @@ static int check_func_arg(struct verifier_env *env, u32 regno,
|
||||
|
||||
static int check_map_func_compatibility(struct bpf_map *map, int func_id)
|
||||
{
|
||||
bool bool_map, bool_func;
|
||||
int i;
|
||||
|
||||
if (!map)
|
||||
return 0;
|
||||
|
||||
for (i = 0; i < ARRAY_SIZE(func_limit); i++) {
|
||||
bool_map = (map->map_type == func_limit[i].map_type);
|
||||
bool_func = (func_id == func_limit[i].func_id);
|
||||
/* only when map & func pair match it can continue.
|
||||
* don't allow any other map type to be passed into
|
||||
* the special func;
|
||||
*/
|
||||
if (bool_func && bool_map != bool_func) {
|
||||
verbose("cannot pass map_type %d into func %d\n",
|
||||
map->map_type, func_id);
|
||||
return -EINVAL;
|
||||
}
|
||||
/* We need a two way check, first is from map perspective ... */
|
||||
switch (map->map_type) {
|
||||
case BPF_MAP_TYPE_PROG_ARRAY:
|
||||
if (func_id != BPF_FUNC_tail_call)
|
||||
goto error;
|
||||
break;
|
||||
case BPF_MAP_TYPE_PERF_EVENT_ARRAY:
|
||||
if (func_id != BPF_FUNC_perf_event_read &&
|
||||
func_id != BPF_FUNC_perf_event_output)
|
||||
goto error;
|
||||
break;
|
||||
case BPF_MAP_TYPE_STACK_TRACE:
|
||||
if (func_id != BPF_FUNC_get_stackid)
|
||||
goto error;
|
||||
break;
|
||||
default:
|
||||
break;
|
||||
}
|
||||
|
||||
/* ... and second from the function itself. */
|
||||
switch (func_id) {
|
||||
case BPF_FUNC_tail_call:
|
||||
if (map->map_type != BPF_MAP_TYPE_PROG_ARRAY)
|
||||
goto error;
|
||||
break;
|
||||
case BPF_FUNC_perf_event_read:
|
||||
case BPF_FUNC_perf_event_output:
|
||||
if (map->map_type != BPF_MAP_TYPE_PERF_EVENT_ARRAY)
|
||||
goto error;
|
||||
break;
|
||||
case BPF_FUNC_get_stackid:
|
||||
if (map->map_type != BPF_MAP_TYPE_STACK_TRACE)
|
||||
goto error;
|
||||
break;
|
||||
default:
|
||||
break;
|
||||
}
|
||||
|
||||
return 0;
|
||||
error:
|
||||
verbose("cannot pass map_type %d into func %d\n",
|
||||
map->map_type, func_id);
|
||||
return -EINVAL;
|
||||
}
|
||||
|
||||
static int check_call(struct verifier_env *env, int func_id)
|
||||
@@ -2030,7 +2045,6 @@ static int replace_map_fd_with_map_ptr(struct verifier_env *env)
|
||||
if (IS_ERR(map)) {
|
||||
verbose("fd %d is not pointing to valid bpf_map\n",
|
||||
insn->imm);
|
||||
fdput(f);
|
||||
return PTR_ERR(map);
|
||||
}
|
||||
|
||||
@@ -2050,15 +2064,18 @@ static int replace_map_fd_with_map_ptr(struct verifier_env *env)
|
||||
return -E2BIG;
|
||||
}
|
||||
|
||||
/* remember this map */
|
||||
env->used_maps[env->used_map_cnt++] = map;
|
||||
|
||||
/* hold the map. If the program is rejected by verifier,
|
||||
* the map will be released by release_maps() or it
|
||||
* will be used by the valid program until it's unloaded
|
||||
* and all maps are released in free_bpf_prog_info()
|
||||
*/
|
||||
bpf_map_inc(map, false);
|
||||
map = bpf_map_inc(map, false);
|
||||
if (IS_ERR(map)) {
|
||||
fdput(f);
|
||||
return PTR_ERR(map);
|
||||
}
|
||||
env->used_maps[env->used_map_cnt++] = map;
|
||||
|
||||
fdput(f);
|
||||
next_insn:
|
||||
insn++;
|
||||
|
||||
@@ -2825,9 +2825,10 @@ static ssize_t __cgroup_procs_write(struct kernfs_open_file *of, char *buf,
|
||||
size_t nbytes, loff_t off, bool threadgroup)
|
||||
{
|
||||
struct task_struct *tsk;
|
||||
struct cgroup_subsys *ss;
|
||||
struct cgroup *cgrp;
|
||||
pid_t pid;
|
||||
int ret;
|
||||
int ssid, ret;
|
||||
|
||||
if (kstrtoint(strstrip(buf), 0, &pid) || pid < 0)
|
||||
return -EINVAL;
|
||||
@@ -2875,8 +2876,10 @@ out_unlock_rcu:
|
||||
rcu_read_unlock();
|
||||
out_unlock_threadgroup:
|
||||
percpu_up_write(&cgroup_threadgroup_rwsem);
|
||||
for_each_subsys(ss, ssid)
|
||||
if (ss->post_attach)
|
||||
ss->post_attach();
|
||||
cgroup_kn_unlock(of->kn);
|
||||
cpuset_post_attach_flush();
|
||||
return ret ?: nbytes;
|
||||
}
|
||||
|
||||
|
||||
@@ -58,7 +58,6 @@
|
||||
#include <asm/uaccess.h>
|
||||
#include <linux/atomic.h>
|
||||
#include <linux/mutex.h>
|
||||
#include <linux/workqueue.h>
|
||||
#include <linux/cgroup.h>
|
||||
#include <linux/wait.h>
|
||||
|
||||
@@ -1016,7 +1015,7 @@ static void cpuset_migrate_mm(struct mm_struct *mm, const nodemask_t *from,
|
||||
}
|
||||
}
|
||||
|
||||
void cpuset_post_attach_flush(void)
|
||||
static void cpuset_post_attach(void)
|
||||
{
|
||||
flush_workqueue(cpuset_migrate_mm_wq);
|
||||
}
|
||||
@@ -2087,6 +2086,7 @@ struct cgroup_subsys cpuset_cgrp_subsys = {
|
||||
.can_attach = cpuset_can_attach,
|
||||
.cancel_attach = cpuset_cancel_attach,
|
||||
.attach = cpuset_attach,
|
||||
.post_attach = cpuset_post_attach,
|
||||
.bind = cpuset_bind,
|
||||
.legacy_cftypes = files,
|
||||
.early_init = true,
|
||||
|
||||
@@ -351,7 +351,7 @@ static struct srcu_struct pmus_srcu;
|
||||
* 1 - disallow cpu events for unpriv
|
||||
* 2 - disallow kernel profiling for unpriv
|
||||
*/
|
||||
int sysctl_perf_event_paranoid __read_mostly = 1;
|
||||
int sysctl_perf_event_paranoid __read_mostly = 2;
|
||||
|
||||
/* Minimum for 512 kiB + 1 user control page */
|
||||
int sysctl_perf_event_mlock __read_mostly = 512 + (PAGE_SIZE / 1024); /* 'free' kiB per user */
|
||||
@@ -412,7 +412,8 @@ int perf_cpu_time_max_percent_handler(struct ctl_table *table, int write,
|
||||
if (ret || !write)
|
||||
return ret;
|
||||
|
||||
if (sysctl_perf_cpu_time_max_percent == 100) {
|
||||
if (sysctl_perf_cpu_time_max_percent == 100 ||
|
||||
sysctl_perf_cpu_time_max_percent == 0) {
|
||||
printk(KERN_WARNING
|
||||
"perf: Dynamic interrupt throttling disabled, can hang your system!\n");
|
||||
WRITE_ONCE(perf_sample_allowed_ns, 0);
|
||||
@@ -1105,6 +1106,7 @@ static void put_ctx(struct perf_event_context *ctx)
|
||||
* function.
|
||||
*
|
||||
* Lock order:
|
||||
* cred_guard_mutex
|
||||
* task_struct::perf_event_mutex
|
||||
* perf_event_context::mutex
|
||||
* perf_event::child_mutex;
|
||||
@@ -3420,7 +3422,6 @@ static struct task_struct *
|
||||
find_lively_task_by_vpid(pid_t vpid)
|
||||
{
|
||||
struct task_struct *task;
|
||||
int err;
|
||||
|
||||
rcu_read_lock();
|
||||
if (!vpid)
|
||||
@@ -3434,16 +3435,7 @@ find_lively_task_by_vpid(pid_t vpid)
|
||||
if (!task)
|
||||
return ERR_PTR(-ESRCH);
|
||||
|
||||
/* Reuse ptrace permission checks for now. */
|
||||
err = -EACCES;
|
||||
if (!ptrace_may_access(task, PTRACE_MODE_READ_REALCREDS))
|
||||
goto errout;
|
||||
|
||||
return task;
|
||||
errout:
|
||||
put_task_struct(task);
|
||||
return ERR_PTR(err);
|
||||
|
||||
}
|
||||
|
||||
/*
|
||||
@@ -8413,6 +8405,24 @@ SYSCALL_DEFINE5(perf_event_open,
|
||||
|
||||
get_online_cpus();
|
||||
|
||||
if (task) {
|
||||
err = mutex_lock_interruptible(&task->signal->cred_guard_mutex);
|
||||
if (err)
|
||||
goto err_cpus;
|
||||
|
||||
/*
|
||||
* Reuse ptrace permission checks for now.
|
||||
*
|
||||
* We must hold cred_guard_mutex across this and any potential
|
||||
* perf_install_in_context() call for this new event to
|
||||
* serialize against exec() altering our credentials (and the
|
||||
* perf_event_exit_task() that could imply).
|
||||
*/
|
||||
err = -EACCES;
|
||||
if (!ptrace_may_access(task, PTRACE_MODE_READ_REALCREDS))
|
||||
goto err_cred;
|
||||
}
|
||||
|
||||
if (flags & PERF_FLAG_PID_CGROUP)
|
||||
cgroup_fd = pid;
|
||||
|
||||
@@ -8420,7 +8430,7 @@ SYSCALL_DEFINE5(perf_event_open,
|
||||
NULL, NULL, cgroup_fd);
|
||||
if (IS_ERR(event)) {
|
||||
err = PTR_ERR(event);
|
||||
goto err_cpus;
|
||||
goto err_cred;
|
||||
}
|
||||
|
||||
if (is_sampling_event(event)) {
|
||||
@@ -8479,11 +8489,6 @@ SYSCALL_DEFINE5(perf_event_open,
|
||||
goto err_context;
|
||||
}
|
||||
|
||||
if (task) {
|
||||
put_task_struct(task);
|
||||
task = NULL;
|
||||
}
|
||||
|
||||
/*
|
||||
* Look up the group leader (we will attach this event to it):
|
||||
*/
|
||||
@@ -8581,6 +8586,11 @@ SYSCALL_DEFINE5(perf_event_open,
|
||||
|
||||
WARN_ON_ONCE(ctx->parent_ctx);
|
||||
|
||||
/*
|
||||
* This is the point on no return; we cannot fail hereafter. This is
|
||||
* where we start modifying current state.
|
||||
*/
|
||||
|
||||
if (move_group) {
|
||||
/*
|
||||
* See perf_event_ctx_lock() for comments on the details
|
||||
@@ -8652,6 +8662,11 @@ SYSCALL_DEFINE5(perf_event_open,
|
||||
mutex_unlock(&gctx->mutex);
|
||||
mutex_unlock(&ctx->mutex);
|
||||
|
||||
if (task) {
|
||||
mutex_unlock(&task->signal->cred_guard_mutex);
|
||||
put_task_struct(task);
|
||||
}
|
||||
|
||||
put_online_cpus();
|
||||
|
||||
mutex_lock(¤t->perf_event_mutex);
|
||||
@@ -8684,6 +8699,9 @@ err_alloc:
|
||||
*/
|
||||
if (!event_file)
|
||||
free_event(event);
|
||||
err_cred:
|
||||
if (task)
|
||||
mutex_unlock(&task->signal->cred_guard_mutex);
|
||||
err_cpus:
|
||||
put_online_cpus();
|
||||
err_task:
|
||||
@@ -8968,6 +8986,9 @@ static void perf_event_exit_task_context(struct task_struct *child, int ctxn)
|
||||
|
||||
/*
|
||||
* When a child task exits, feed back event values to parent events.
|
||||
*
|
||||
* Can be called with cred_guard_mutex held when called from
|
||||
* install_exec_creds().
|
||||
*/
|
||||
void perf_event_exit_task(struct task_struct *child)
|
||||
{
|
||||
|
||||
@@ -1,5 +1,6 @@
|
||||
#define pr_fmt(fmt) "kcov: " fmt
|
||||
|
||||
#define DISABLE_BRANCH_PROFILING
|
||||
#include <linux/compiler.h>
|
||||
#include <linux/types.h>
|
||||
#include <linux/file.h>
|
||||
@@ -43,7 +44,7 @@ struct kcov {
|
||||
* Entry point from instrumented code.
|
||||
* This is called once per basic-block/edge.
|
||||
*/
|
||||
void __sanitizer_cov_trace_pc(void)
|
||||
void notrace __sanitizer_cov_trace_pc(void)
|
||||
{
|
||||
struct task_struct *t;
|
||||
enum kcov_mode mode;
|
||||
|
||||
@@ -1415,6 +1415,9 @@ static int __init crash_save_vmcoreinfo_init(void)
|
||||
VMCOREINFO_OFFSET(page, lru);
|
||||
VMCOREINFO_OFFSET(page, _mapcount);
|
||||
VMCOREINFO_OFFSET(page, private);
|
||||
VMCOREINFO_OFFSET(page, compound_dtor);
|
||||
VMCOREINFO_OFFSET(page, compound_order);
|
||||
VMCOREINFO_OFFSET(page, compound_head);
|
||||
VMCOREINFO_OFFSET(pglist_data, node_zones);
|
||||
VMCOREINFO_OFFSET(pglist_data, nr_zones);
|
||||
#ifdef CONFIG_FLAT_NODE_MEM_MAP
|
||||
@@ -1447,8 +1450,8 @@ static int __init crash_save_vmcoreinfo_init(void)
|
||||
#ifdef CONFIG_X86
|
||||
VMCOREINFO_NUMBER(KERNEL_IMAGE_SIZE);
|
||||
#endif
|
||||
#ifdef CONFIG_HUGETLBFS
|
||||
VMCOREINFO_SYMBOL(free_huge_page);
|
||||
#ifdef CONFIG_HUGETLB_PAGE
|
||||
VMCOREINFO_NUMBER(HUGETLB_PAGE_DTOR);
|
||||
#endif
|
||||
|
||||
arch_crash_save_vmcoreinfo();
|
||||
|
||||
@@ -2177,15 +2177,37 @@ cache_hit:
|
||||
chain->irq_context = hlock->irq_context;
|
||||
i = get_first_held_lock(curr, hlock);
|
||||
chain->depth = curr->lockdep_depth + 1 - i;
|
||||
|
||||
BUILD_BUG_ON((1UL << 24) <= ARRAY_SIZE(chain_hlocks));
|
||||
BUILD_BUG_ON((1UL << 6) <= ARRAY_SIZE(curr->held_locks));
|
||||
BUILD_BUG_ON((1UL << 8*sizeof(chain_hlocks[0])) <= ARRAY_SIZE(lock_classes));
|
||||
|
||||
if (likely(nr_chain_hlocks + chain->depth <= MAX_LOCKDEP_CHAIN_HLOCKS)) {
|
||||
chain->base = nr_chain_hlocks;
|
||||
nr_chain_hlocks += chain->depth;
|
||||
for (j = 0; j < chain->depth - 1; j++, i++) {
|
||||
int lock_id = curr->held_locks[i].class_idx - 1;
|
||||
chain_hlocks[chain->base + j] = lock_id;
|
||||
}
|
||||
chain_hlocks[chain->base + j] = class - lock_classes;
|
||||
}
|
||||
|
||||
if (nr_chain_hlocks < MAX_LOCKDEP_CHAIN_HLOCKS)
|
||||
nr_chain_hlocks += chain->depth;
|
||||
|
||||
#ifdef CONFIG_DEBUG_LOCKDEP
|
||||
/*
|
||||
* Important for check_no_collision().
|
||||
*/
|
||||
if (unlikely(nr_chain_hlocks > MAX_LOCKDEP_CHAIN_HLOCKS)) {
|
||||
if (debug_locks_off_graph_unlock())
|
||||
return 0;
|
||||
|
||||
print_lockdep_off("BUG: MAX_LOCKDEP_CHAIN_HLOCKS too low!");
|
||||
dump_stack();
|
||||
return 0;
|
||||
}
|
||||
#endif
|
||||
|
||||
hlist_add_head_rcu(&chain->entry, hash_head);
|
||||
debug_atomic_inc(chain_lookup_misses);
|
||||
inc_chains();
|
||||
@@ -2933,6 +2955,11 @@ static int mark_irqflags(struct task_struct *curr, struct held_lock *hlock)
|
||||
return 1;
|
||||
}
|
||||
|
||||
static inline unsigned int task_irq_context(struct task_struct *task)
|
||||
{
|
||||
return 2 * !!task->hardirq_context + !!task->softirq_context;
|
||||
}
|
||||
|
||||
static int separate_irq_context(struct task_struct *curr,
|
||||
struct held_lock *hlock)
|
||||
{
|
||||
@@ -2941,8 +2968,6 @@ static int separate_irq_context(struct task_struct *curr,
|
||||
/*
|
||||
* Keep track of points where we cross into an interrupt context:
|
||||
*/
|
||||
hlock->irq_context = 2*(curr->hardirq_context ? 1 : 0) +
|
||||
curr->softirq_context;
|
||||
if (depth) {
|
||||
struct held_lock *prev_hlock;
|
||||
|
||||
@@ -2974,6 +2999,11 @@ static inline int mark_irqflags(struct task_struct *curr,
|
||||
return 1;
|
||||
}
|
||||
|
||||
static inline unsigned int task_irq_context(struct task_struct *task)
|
||||
{
|
||||
return 0;
|
||||
}
|
||||
|
||||
static inline int separate_irq_context(struct task_struct *curr,
|
||||
struct held_lock *hlock)
|
||||
{
|
||||
@@ -3242,6 +3272,7 @@ static int __lock_acquire(struct lockdep_map *lock, unsigned int subclass,
|
||||
hlock->acquire_ip = ip;
|
||||
hlock->instance = lock;
|
||||
hlock->nest_lock = nest_lock;
|
||||
hlock->irq_context = task_irq_context(curr);
|
||||
hlock->trylock = trylock;
|
||||
hlock->read = read;
|
||||
hlock->check = check;
|
||||
|
||||
@@ -141,6 +141,8 @@ static int lc_show(struct seq_file *m, void *v)
|
||||
int i;
|
||||
|
||||
if (v == SEQ_START_TOKEN) {
|
||||
if (nr_chain_hlocks > MAX_LOCKDEP_CHAIN_HLOCKS)
|
||||
seq_printf(m, "(buggered) ");
|
||||
seq_printf(m, "all lock chains:\n");
|
||||
return 0;
|
||||
}
|
||||
|
||||
@@ -1395,6 +1395,7 @@ static struct rq *find_lock_later_rq(struct task_struct *task, struct rq *rq)
|
||||
!cpumask_test_cpu(later_rq->cpu,
|
||||
&task->cpus_allowed) ||
|
||||
task_running(rq, task) ||
|
||||
!dl_task(task) ||
|
||||
!task_on_rq_queued(task))) {
|
||||
double_unlock_balance(rq, later_rq);
|
||||
later_rq = NULL;
|
||||
|
||||
@@ -3099,7 +3099,14 @@ static int idle_balance(struct rq *this_rq);
|
||||
|
||||
#else /* CONFIG_SMP */
|
||||
|
||||
static inline void update_load_avg(struct sched_entity *se, int update_tg) {}
|
||||
static inline void update_load_avg(struct sched_entity *se, int not_used)
|
||||
{
|
||||
struct cfs_rq *cfs_rq = cfs_rq_of(se);
|
||||
struct rq *rq = rq_of(cfs_rq);
|
||||
|
||||
cpufreq_trigger_update(rq_clock(rq));
|
||||
}
|
||||
|
||||
static inline void
|
||||
enqueue_entity_load_avg(struct cfs_rq *cfs_rq, struct sched_entity *se) {}
|
||||
static inline void
|
||||
@@ -3250,25 +3257,17 @@ static inline void check_schedstat_required(void)
|
||||
static void
|
||||
enqueue_entity(struct cfs_rq *cfs_rq, struct sched_entity *se, int flags)
|
||||
{
|
||||
bool renorm = !(flags & ENQUEUE_WAKEUP) || (flags & ENQUEUE_WAKING);
|
||||
bool curr = cfs_rq->curr == se;
|
||||
|
||||
/*
|
||||
* If we're the current task, we must renormalise before calling
|
||||
* update_curr().
|
||||
* Update the normalized vruntime before updating min_vruntime
|
||||
* through calling update_curr().
|
||||
*/
|
||||
if (renorm && curr)
|
||||
if (!(flags & ENQUEUE_WAKEUP) || (flags & ENQUEUE_WAKING))
|
||||
se->vruntime += cfs_rq->min_vruntime;
|
||||
|
||||
/*
|
||||
* Update run-time statistics of the 'current'.
|
||||
*/
|
||||
update_curr(cfs_rq);
|
||||
|
||||
/*
|
||||
* Otherwise, renormalise after, such that we're placed at the current
|
||||
* moment in time, instead of some random moment in the past.
|
||||
*/
|
||||
if (renorm && !curr)
|
||||
se->vruntime += cfs_rq->min_vruntime;
|
||||
|
||||
enqueue_entity_load_avg(cfs_rq, se);
|
||||
account_entity_enqueue(cfs_rq, se);
|
||||
update_cfs_shares(cfs_rq);
|
||||
@@ -3284,7 +3283,7 @@ enqueue_entity(struct cfs_rq *cfs_rq, struct sched_entity *se, int flags)
|
||||
update_stats_enqueue(cfs_rq, se);
|
||||
check_spread(cfs_rq, se);
|
||||
}
|
||||
if (!curr)
|
||||
if (se != cfs_rq->curr)
|
||||
__enqueue_entity(cfs_rq, se);
|
||||
se->on_rq = 1;
|
||||
|
||||
|
||||
@@ -1729,6 +1729,7 @@ static struct rq *find_lock_lowest_rq(struct task_struct *task, struct rq *rq)
|
||||
!cpumask_test_cpu(lowest_rq->cpu,
|
||||
tsk_cpus_allowed(task)) ||
|
||||
task_running(rq, task) ||
|
||||
!rt_task(task) ||
|
||||
!task_on_rq_queued(task))) {
|
||||
|
||||
double_unlock_balance(rq, lowest_rq);
|
||||
|
||||
@@ -2095,8 +2095,13 @@ event_create_dir(struct dentry *parent, struct trace_event_file *file)
|
||||
trace_create_file("filter", 0644, file->dir, file,
|
||||
&ftrace_event_filter_fops);
|
||||
|
||||
trace_create_file("trigger", 0644, file->dir, file,
|
||||
&event_trigger_fops);
|
||||
/*
|
||||
* Only event directories that can be enabled should have
|
||||
* triggers.
|
||||
*/
|
||||
if (!(call->flags & TRACE_EVENT_FL_IGNORE_ENABLE))
|
||||
trace_create_file("trigger", 0644, file->dir, file,
|
||||
&event_trigger_fops);
|
||||
|
||||
trace_create_file("format", 0444, file->dir, call,
|
||||
&ftrace_event_format_fops);
|
||||
|
||||
@@ -666,6 +666,35 @@ static void set_work_pool_and_clear_pending(struct work_struct *work,
|
||||
*/
|
||||
smp_wmb();
|
||||
set_work_data(work, (unsigned long)pool_id << WORK_OFFQ_POOL_SHIFT, 0);
|
||||
/*
|
||||
* The following mb guarantees that previous clear of a PENDING bit
|
||||
* will not be reordered with any speculative LOADS or STORES from
|
||||
* work->current_func, which is executed afterwards. This possible
|
||||
* reordering can lead to a missed execution on attempt to qeueue
|
||||
* the same @work. E.g. consider this case:
|
||||
*
|
||||
* CPU#0 CPU#1
|
||||
* ---------------------------- --------------------------------
|
||||
*
|
||||
* 1 STORE event_indicated
|
||||
* 2 queue_work_on() {
|
||||
* 3 test_and_set_bit(PENDING)
|
||||
* 4 } set_..._and_clear_pending() {
|
||||
* 5 set_work_data() # clear bit
|
||||
* 6 smp_mb()
|
||||
* 7 work->current_func() {
|
||||
* 8 LOAD event_indicated
|
||||
* }
|
||||
*
|
||||
* Without an explicit full barrier speculative LOAD on line 8 can
|
||||
* be executed before CPU#0 does STORE on line 1. If that happens,
|
||||
* CPU#0 observes the PENDING bit is still set and new execution of
|
||||
* a @work is not queued in a hope, that CPU#1 will eventually
|
||||
* finish the queued @work. Meanwhile CPU#1 does not see
|
||||
* event_indicated is set, because speculative LOAD was executed
|
||||
* before actual STORE.
|
||||
*/
|
||||
smp_mb();
|
||||
}
|
||||
|
||||
static void clear_work_data(struct work_struct *work)
|
||||
|
||||
Reference in New Issue
Block a user