Merge branch 'linus' into sched/core, to resolve semantic conflict

Signed-off-by: Ingo Molnar <mingo@kernel.org>
This commit is contained in:
Ingo Molnar
2020-11-27 11:09:57 +01:00
1807 changed files with 21317 additions and 37493 deletions

View File

@@ -1,6 +1,10 @@
# SPDX-License-Identifier: GPL-2.0
obj-y := core.o
CFLAGS_core.o += $(call cc-disable-warning, override-init)
ifneq ($(CONFIG_BPF_JIT_ALWAYS_ON),y)
# ___bpf_prog_run() needs GCSE disabled on x86; see 3193c0836f203 for details
cflags-nogcse-$(CONFIG_X86)$(CONFIG_CC_IS_GCC) := -fno-gcse
endif
CFLAGS_core.o += $(call cc-disable-warning, override-init) $(cflags-nogcse-yy)
obj-$(CONFIG_BPF_SYSCALL) += syscall.o verifier.o inode.o helpers.o tnum.o bpf_iter.o map_iter.o task_iter.o prog_iter.o
obj-$(CONFIG_BPF_SYSCALL) += hashtab.o arraymap.o percpu_freelist.o bpf_lru_list.o lpm_trie.o map_in_map.o

View File

@@ -13,6 +13,7 @@
#include <linux/bpf_verifier.h>
#include <net/bpf_sk_storage.h>
#include <linux/bpf_local_storage.h>
#include <linux/btf_ids.h>
/* For every LSM hook that allows attachment of BPF programs, declare a nop
* function where a BPF program can be attached.
@@ -26,7 +27,11 @@ noinline RET bpf_lsm_##NAME(__VA_ARGS__) \
#include <linux/lsm_hook_defs.h>
#undef LSM_HOOK
#define BPF_LSM_SYM_PREFX "bpf_lsm_"
#define LSM_HOOK(RET, DEFAULT, NAME, ...) BTF_ID(func, bpf_lsm_##NAME)
BTF_SET_START(bpf_lsm_hooks)
#include <linux/lsm_hook_defs.h>
#undef LSM_HOOK
BTF_SET_END(bpf_lsm_hooks)
int bpf_lsm_verify_prog(struct bpf_verifier_log *vlog,
const struct bpf_prog *prog)
@@ -37,8 +42,7 @@ int bpf_lsm_verify_prog(struct bpf_verifier_log *vlog,
return -EINVAL;
}
if (strncmp(BPF_LSM_SYM_PREFX, prog->aux->attach_func_name,
sizeof(BPF_LSM_SYM_PREFX) - 1)) {
if (!btf_id_set_contains(&bpf_lsm_hooks, prog->aux->attach_btf_id)) {
bpf_log(vlog, "attach_btf_id %u points to wrong type name %s\n",
prog->aux->attach_btf_id, prog->aux->attach_func_name);
return -EINVAL;

View File

@@ -1369,7 +1369,7 @@ u64 __weak bpf_probe_read_kernel(void *dst, u32 size, const void *unsafe_ptr)
*
* Decode and execute eBPF instructions.
*/
static u64 __no_fgcse ___bpf_prog_run(u64 *regs, const struct bpf_insn *insn, u64 *stack)
static u64 ___bpf_prog_run(u64 *regs, const struct bpf_insn *insn, u64 *stack)
{
#define BPF_INSN_2_LBL(x, y) [BPF_##x | BPF_##y] = &&x##_##y
#define BPF_INSN_3_LBL(x, y, z) [BPF_##x | BPF_##y | BPF_##z] = &&x##_##y##_##z

View File

@@ -821,6 +821,32 @@ static void pcpu_copy_value(struct bpf_htab *htab, void __percpu *pptr,
}
}
static void pcpu_init_value(struct bpf_htab *htab, void __percpu *pptr,
void *value, bool onallcpus)
{
/* When using prealloc and not setting the initial value on all cpus,
* zero-fill element values for other cpus (just as what happens when
* not using prealloc). Otherwise, bpf program has no way to ensure
* known initial values for cpus other than current one
* (onallcpus=false always when coming from bpf prog).
*/
if (htab_is_prealloc(htab) && !onallcpus) {
u32 size = round_up(htab->map.value_size, 8);
int current_cpu = raw_smp_processor_id();
int cpu;
for_each_possible_cpu(cpu) {
if (cpu == current_cpu)
bpf_long_memcpy(per_cpu_ptr(pptr, cpu), value,
size);
else
memset(per_cpu_ptr(pptr, cpu), 0, size);
}
} else {
pcpu_copy_value(htab, pptr, value, onallcpus);
}
}
static bool fd_htab_map_needs_adjust(const struct bpf_htab *htab)
{
return htab->map.map_type == BPF_MAP_TYPE_HASH_OF_MAPS &&
@@ -891,7 +917,7 @@ static struct htab_elem *alloc_htab_elem(struct bpf_htab *htab, void *key,
}
}
pcpu_copy_value(htab, pptr, value, onallcpus);
pcpu_init_value(htab, pptr, value, onallcpus);
if (!prealloc)
htab_elem_set_ptr(l_new, key_size, pptr);
@@ -1183,7 +1209,7 @@ static int __htab_lru_percpu_map_update_elem(struct bpf_map *map, void *key,
pcpu_copy_value(htab, htab_elem_get_ptr(l_old, key_size),
value, onallcpus);
} else {
pcpu_copy_value(htab, htab_elem_get_ptr(l_new, key_size),
pcpu_init_value(htab, htab_elem_get_ptr(l_new, key_size),
value, onallcpus);
hlist_nulls_add_head_rcu(&l_new->hash_node, head);
l_new = NULL;

View File

@@ -6,6 +6,7 @@ config USERMODE_DRIVER
menuconfig BPF_PRELOAD
bool "Preload BPF file system with kernel specific program and map iterators"
depends on BPF
depends on BPF_SYSCALL
# The dependency on !COMPILE_TEST prevents it from being enabled
# in allmodconfig or allyesconfig configurations
depends on !COMPILE_TEST

View File

@@ -7786,9 +7786,11 @@ static int check_return_code(struct bpf_verifier_env *env)
struct tnum range = tnum_range(0, 1);
enum bpf_prog_type prog_type = resolve_prog_type(env->prog);
int err;
const bool is_subprog = env->cur_state->frame[0]->subprogno;
/* LSM and struct_ops func-ptr's return type could be "void" */
if ((prog_type == BPF_PROG_TYPE_STRUCT_OPS ||
if (!is_subprog &&
(prog_type == BPF_PROG_TYPE_STRUCT_OPS ||
prog_type == BPF_PROG_TYPE_LSM) &&
!prog->aux->attach_func_proto->type)
return 0;
@@ -7808,6 +7810,16 @@ static int check_return_code(struct bpf_verifier_env *env)
return -EACCES;
}
reg = cur_regs(env) + BPF_REG_0;
if (is_subprog) {
if (reg->type != SCALAR_VALUE) {
verbose(env, "At subprogram exit the register R0 is not a scalar value (%s)\n",
reg_type_str[reg->type]);
return -EINVAL;
}
return 0;
}
switch (prog_type) {
case BPF_PROG_TYPE_CGROUP_SOCK_ADDR:
if (env->prog->expected_attach_type == BPF_CGROUP_UDP4_RECVMSG ||
@@ -7861,7 +7873,6 @@ static int check_return_code(struct bpf_verifier_env *env)
return 0;
}
reg = cur_regs(env) + BPF_REG_0;
if (reg->type != SCALAR_VALUE) {
verbose(env, "At program exit the register R0 is not a known value (%s)\n",
reg_type_str[reg->type]);
@@ -9572,12 +9583,13 @@ static int check_pseudo_btf_id(struct bpf_verifier_env *env,
struct bpf_insn *insn,
struct bpf_insn_aux_data *aux)
{
u32 datasec_id, type, id = insn->imm;
const struct btf_var_secinfo *vsi;
const struct btf_type *datasec;
const struct btf_type *t;
const char *sym_name;
bool percpu = false;
u32 type, id = insn->imm;
s32 datasec_id;
u64 addr;
int i;

View File

@@ -229,6 +229,7 @@ int __init swiotlb_init_with_tbl(char *tlb, unsigned long nslabs, int verbose)
io_tlb_orig_addr[i] = INVALID_PHYS_ADDR;
}
io_tlb_index = 0;
no_iotlb_memory = false;
if (verbose)
swiotlb_print_info();
@@ -260,9 +261,11 @@ swiotlb_init(int verbose)
if (vstart && !swiotlb_init_with_tbl(vstart, io_tlb_nslabs, verbose))
return;
if (io_tlb_start)
if (io_tlb_start) {
memblock_free_early(io_tlb_start,
PAGE_ALIGN(io_tlb_nslabs << IO_TLB_SHIFT));
io_tlb_start = 0;
}
pr_warn("Cannot allocate buffer");
no_iotlb_memory = true;
}
@@ -360,6 +363,7 @@ swiotlb_late_init_with_tbl(char *tlb, unsigned long nslabs)
io_tlb_orig_addr[i] = INVALID_PHYS_ADDR;
}
io_tlb_index = 0;
no_iotlb_memory = false;
swiotlb_print_info();
@@ -441,14 +445,11 @@ static void swiotlb_bounce(phys_addr_t orig_addr, phys_addr_t tlb_addr,
}
}
phys_addr_t swiotlb_tbl_map_single(struct device *hwdev,
dma_addr_t tbl_dma_addr,
phys_addr_t orig_addr,
size_t mapping_size,
size_t alloc_size,
enum dma_data_direction dir,
unsigned long attrs)
phys_addr_t swiotlb_tbl_map_single(struct device *hwdev, phys_addr_t orig_addr,
size_t mapping_size, size_t alloc_size,
enum dma_data_direction dir, unsigned long attrs)
{
dma_addr_t tbl_dma_addr = phys_to_dma_unencrypted(hwdev, io_tlb_start);
unsigned long flags;
phys_addr_t tlb_addr;
unsigned int nslots, stride, index, wrap;
@@ -667,9 +668,8 @@ dma_addr_t swiotlb_map(struct device *dev, phys_addr_t paddr, size_t size,
trace_swiotlb_bounced(dev, phys_to_dma(dev, paddr), size,
swiotlb_force);
swiotlb_addr = swiotlb_tbl_map_single(dev,
phys_to_dma_unencrypted(dev, io_tlb_start),
paddr, size, size, dir, attrs);
swiotlb_addr = swiotlb_tbl_map_single(dev, paddr, size, size, dir,
attrs);
if (swiotlb_addr == (phys_addr_t)DMA_MAPPING_ERROR)
return DMA_MAPPING_ERROR;

View File

@@ -337,10 +337,10 @@ noinstr irqentry_state_t irqentry_enter(struct pt_regs *regs)
* already contains a warning when RCU is not watching, so no point
* in having another one here.
*/
lockdep_hardirqs_off(CALLER_ADDR0);
instrumentation_begin();
rcu_irq_enter_check_tick();
/* Use the combo lockdep/tracing function */
trace_hardirqs_off();
trace_hardirqs_off_finish();
instrumentation_end();
return ret;

View File

@@ -2312,9 +2312,6 @@ group_sched_out(struct perf_event *group_event,
event_sched_out(event, cpuctx, ctx);
perf_pmu_enable(ctx->pmu);
if (group_event->attr.exclusive)
cpuctx->exclusive = 0;
}
#define DETACH_GROUP 0x01UL
@@ -2583,11 +2580,8 @@ group_sched_in(struct perf_event *group_event,
pmu->start_txn(pmu, PERF_PMU_TXN_ADD);
if (event_sched_in(group_event, cpuctx, ctx)) {
pmu->cancel_txn(pmu);
perf_mux_hrtimer_restart(cpuctx);
return -EAGAIN;
}
if (event_sched_in(group_event, cpuctx, ctx))
goto error;
/*
* Schedule in siblings as one group (if any):
@@ -2616,10 +2610,8 @@ group_error:
}
event_sched_out(group_event, cpuctx, ctx);
error:
pmu->cancel_txn(pmu);
perf_mux_hrtimer_restart(cpuctx);
return -EAGAIN;
}
@@ -2645,7 +2637,7 @@ static int group_can_go_on(struct perf_event *event,
* If this group is exclusive and there are already
* events on the CPU, it can't go on.
*/
if (event->attr.exclusive && cpuctx->active_oncpu)
if (event->attr.exclusive && !list_empty(get_event_list(event)))
return 0;
/*
* Otherwise, try to add it if all previous groups were able
@@ -3679,6 +3671,7 @@ static int merge_sched_in(struct perf_event *event, void *data)
*can_add_hw = 0;
ctx->rotate_necessary = 1;
perf_mux_hrtimer_restart(cpuctx);
}
return 0;
@@ -6374,14 +6367,13 @@ perf_output_sample_regs(struct perf_output_handle *handle,
}
static void perf_sample_regs_user(struct perf_regs *regs_user,
struct pt_regs *regs,
struct pt_regs *regs_user_copy)
struct pt_regs *regs)
{
if (user_mode(regs)) {
regs_user->abi = perf_reg_abi(current);
regs_user->regs = regs;
} else if (!(current->flags & PF_KTHREAD)) {
perf_get_regs_user(regs_user, regs, regs_user_copy);
perf_get_regs_user(regs_user, regs);
} else {
regs_user->abi = PERF_SAMPLE_REGS_ABI_NONE;
regs_user->regs = NULL;
@@ -7083,8 +7075,7 @@ void perf_prepare_sample(struct perf_event_header *header,
}
if (sample_type & (PERF_SAMPLE_REGS_USER | PERF_SAMPLE_STACK_USER))
perf_sample_regs_user(&data->regs_user, regs,
&data->regs_user_copy);
perf_sample_regs_user(&data->regs_user, regs);
if (sample_type & PERF_SAMPLE_REGS_USER) {
/* regs dump ABI info */
@@ -7186,6 +7177,7 @@ __perf_event_output(struct perf_event *event,
struct perf_sample_data *data,
struct pt_regs *regs,
int (*output_begin)(struct perf_output_handle *,
struct perf_sample_data *,
struct perf_event *,
unsigned int))
{
@@ -7198,7 +7190,7 @@ __perf_event_output(struct perf_event *event,
perf_prepare_sample(&header, data, event, regs);
err = output_begin(&handle, event, header.size);
err = output_begin(&handle, data, event, header.size);
if (err)
goto exit;
@@ -7264,7 +7256,7 @@ perf_event_read_event(struct perf_event *event,
int ret;
perf_event_header__init_id(&read_event.header, &sample, event);
ret = perf_output_begin(&handle, event, read_event.header.size);
ret = perf_output_begin(&handle, &sample, event, read_event.header.size);
if (ret)
return;
@@ -7533,7 +7525,7 @@ static void perf_event_task_output(struct perf_event *event,
perf_event_header__init_id(&task_event->event_id.header, &sample, event);
ret = perf_output_begin(&handle, event,
ret = perf_output_begin(&handle, &sample, event,
task_event->event_id.header.size);
if (ret)
goto out;
@@ -7636,7 +7628,7 @@ static void perf_event_comm_output(struct perf_event *event,
return;
perf_event_header__init_id(&comm_event->event_id.header, &sample, event);
ret = perf_output_begin(&handle, event,
ret = perf_output_begin(&handle, &sample, event,
comm_event->event_id.header.size);
if (ret)
@@ -7736,7 +7728,7 @@ static void perf_event_namespaces_output(struct perf_event *event,
perf_event_header__init_id(&namespaces_event->event_id.header,
&sample, event);
ret = perf_output_begin(&handle, event,
ret = perf_output_begin(&handle, &sample, event,
namespaces_event->event_id.header.size);
if (ret)
goto out;
@@ -7863,7 +7855,7 @@ static void perf_event_cgroup_output(struct perf_event *event, void *data)
perf_event_header__init_id(&cgroup_event->event_id.header,
&sample, event);
ret = perf_output_begin(&handle, event,
ret = perf_output_begin(&handle, &sample, event,
cgroup_event->event_id.header.size);
if (ret)
goto out;
@@ -7989,7 +7981,7 @@ static void perf_event_mmap_output(struct perf_event *event,
}
perf_event_header__init_id(&mmap_event->event_id.header, &sample, event);
ret = perf_output_begin(&handle, event,
ret = perf_output_begin(&handle, &sample, event,
mmap_event->event_id.header.size);
if (ret)
goto out;
@@ -8299,7 +8291,7 @@ void perf_event_aux_event(struct perf_event *event, unsigned long head,
int ret;
perf_event_header__init_id(&rec.header, &sample, event);
ret = perf_output_begin(&handle, event, rec.header.size);
ret = perf_output_begin(&handle, &sample, event, rec.header.size);
if (ret)
return;
@@ -8333,7 +8325,7 @@ void perf_log_lost_samples(struct perf_event *event, u64 lost)
perf_event_header__init_id(&lost_samples_event.header, &sample, event);
ret = perf_output_begin(&handle, event,
ret = perf_output_begin(&handle, &sample, event,
lost_samples_event.header.size);
if (ret)
return;
@@ -8388,7 +8380,7 @@ static void perf_event_switch_output(struct perf_event *event, void *data)
perf_event_header__init_id(&se->event_id.header, &sample, event);
ret = perf_output_begin(&handle, event, se->event_id.header.size);
ret = perf_output_begin(&handle, &sample, event, se->event_id.header.size);
if (ret)
return;
@@ -8463,7 +8455,7 @@ static void perf_log_throttle(struct perf_event *event, int enable)
perf_event_header__init_id(&throttle_event.header, &sample, event);
ret = perf_output_begin(&handle, event,
ret = perf_output_begin(&handle, &sample, event,
throttle_event.header.size);
if (ret)
return;
@@ -8506,7 +8498,7 @@ static void perf_event_ksymbol_output(struct perf_event *event, void *data)
perf_event_header__init_id(&ksymbol_event->event_id.header,
&sample, event);
ret = perf_output_begin(&handle, event,
ret = perf_output_begin(&handle, &sample, event,
ksymbol_event->event_id.header.size);
if (ret)
return;
@@ -8596,7 +8588,7 @@ static void perf_event_bpf_output(struct perf_event *event, void *data)
perf_event_header__init_id(&bpf_event->event_id.header,
&sample, event);
ret = perf_output_begin(&handle, event,
ret = perf_output_begin(&handle, data, event,
bpf_event->event_id.header.size);
if (ret)
return;
@@ -8705,7 +8697,8 @@ static void perf_event_text_poke_output(struct perf_event *event, void *data)
perf_event_header__init_id(&text_poke_event->event_id.header, &sample, event);
ret = perf_output_begin(&handle, event, text_poke_event->event_id.header.size);
ret = perf_output_begin(&handle, &sample, event,
text_poke_event->event_id.header.size);
if (ret)
return;
@@ -8786,7 +8779,7 @@ static void perf_log_itrace_start(struct perf_event *event)
rec.tid = perf_event_tid(event, current);
perf_event_header__init_id(&rec.header, &sample, event);
ret = perf_output_begin(&handle, event, rec.header.size);
ret = perf_output_begin(&handle, &sample, event, rec.header.size);
if (ret)
return;
@@ -10085,6 +10078,7 @@ perf_event_parse_addr_filter(struct perf_event *event, char *fstr,
if (token == IF_SRC_FILE || token == IF_SRC_FILEADDR) {
int fpos = token == IF_SRC_FILE ? 2 : 1;
kfree(filename);
filename = match_strdup(&args[fpos]);
if (!filename) {
ret = -ENOMEM;
@@ -10131,16 +10125,13 @@ perf_event_parse_addr_filter(struct perf_event *event, char *fstr,
*/
ret = -EOPNOTSUPP;
if (!event->ctx->task)
goto fail_free_name;
goto fail;
/* look up the path and grab its inode */
ret = kern_path(filename, LOOKUP_FOLLOW,
&filter->path);
if (ret)
goto fail_free_name;
kfree(filename);
filename = NULL;
goto fail;
ret = -EINVAL;
if (!filter->path.dentry ||
@@ -10160,13 +10151,13 @@ perf_event_parse_addr_filter(struct perf_event *event, char *fstr,
if (state != IF_STATE_ACTION)
goto fail;
kfree(filename);
kfree(orig);
return 0;
fail_free_name:
kfree(filename);
fail:
kfree(filename);
free_filters_list(filters);
kfree(orig);

View File

@@ -205,16 +205,12 @@ DEFINE_OUTPUT_COPY(__output_copy_user, arch_perf_out_copy_user)
static inline int get_recursion_context(int *recursion)
{
int rctx;
unsigned int pc = preempt_count();
unsigned char rctx = 0;
if (unlikely(in_nmi()))
rctx = 3;
else if (in_irq())
rctx = 2;
else if (in_softirq())
rctx = 1;
else
rctx = 0;
rctx += !!(pc & (NMI_MASK));
rctx += !!(pc & (NMI_MASK | HARDIRQ_MASK));
rctx += !!(pc & (NMI_MASK | HARDIRQ_MASK | SOFTIRQ_OFFSET));
if (recursion[rctx])
return -1;

View File

@@ -147,6 +147,7 @@ ring_buffer_has_space(unsigned long head, unsigned long tail,
static __always_inline int
__perf_output_begin(struct perf_output_handle *handle,
struct perf_sample_data *data,
struct perf_event *event, unsigned int size,
bool backward)
{
@@ -237,18 +238,16 @@ __perf_output_begin(struct perf_output_handle *handle,
handle->size = (1UL << page_shift) - offset;
if (unlikely(have_lost)) {
struct perf_sample_data sample_data;
lost_event.header.size = sizeof(lost_event);
lost_event.header.type = PERF_RECORD_LOST;
lost_event.header.misc = 0;
lost_event.id = event->id;
lost_event.lost = local_xchg(&rb->lost, 0);
perf_event_header__init_id(&lost_event.header,
&sample_data, event);
/* XXX mostly redundant; @data is already fully initializes */
perf_event_header__init_id(&lost_event.header, data, event);
perf_output_put(handle, lost_event);
perf_event__output_id_sample(event, handle, &sample_data);
perf_event__output_id_sample(event, handle, data);
}
return 0;
@@ -263,22 +262,25 @@ out:
}
int perf_output_begin_forward(struct perf_output_handle *handle,
struct perf_event *event, unsigned int size)
struct perf_sample_data *data,
struct perf_event *event, unsigned int size)
{
return __perf_output_begin(handle, event, size, false);
return __perf_output_begin(handle, data, event, size, false);
}
int perf_output_begin_backward(struct perf_output_handle *handle,
struct perf_sample_data *data,
struct perf_event *event, unsigned int size)
{
return __perf_output_begin(handle, event, size, true);
return __perf_output_begin(handle, data, event, size, true);
}
int perf_output_begin(struct perf_output_handle *handle,
struct perf_sample_data *data,
struct perf_event *event, unsigned int size)
{
return __perf_output_begin(handle, event, size,
return __perf_output_begin(handle, data, event, size,
unlikely(is_write_backward(event)));
}

View File

@@ -454,7 +454,10 @@ static void exit_mm(void)
mmap_read_unlock(mm);
self.task = current;
self.next = xchg(&core_state->dumper.next, &self);
if (self.task->flags & PF_SIGNALED)
self.next = xchg(&core_state->dumper.next, &self);
else
self.task = NULL;
/*
* Implies mb(), the result of xchg() must be visible
* to core_state->dumper.

View File

@@ -253,7 +253,7 @@ static ssize_t fei_write(struct file *file, const char __user *buffer,
if (copy_from_user(buf, buffer, count)) {
ret = -EFAULT;
goto out;
goto out_free;
}
buf[count] = '\0';
sym = strstrip(buf);
@@ -307,8 +307,9 @@ static ssize_t fei_write(struct file *file, const char __user *buffer,
ret = count;
}
out:
kfree(buf);
mutex_unlock(&fei_lock);
out_free:
kfree(buf);
return ret;
}

View File

@@ -2167,14 +2167,9 @@ static __latent_entropy struct task_struct *copy_process(
/* ok, now we should be set up.. */
p->pid = pid_nr(pid);
if (clone_flags & CLONE_THREAD) {
p->exit_signal = -1;
p->group_leader = current->group_leader;
p->tgid = current->tgid;
} else {
if (clone_flags & CLONE_PARENT)
p->exit_signal = current->group_leader->exit_signal;
else
p->exit_signal = args->exit_signal;
p->group_leader = p;
p->tgid = p->pid;
}
@@ -2218,9 +2213,14 @@ static __latent_entropy struct task_struct *copy_process(
if (clone_flags & (CLONE_PARENT|CLONE_THREAD)) {
p->real_parent = current->real_parent;
p->parent_exec_id = current->parent_exec_id;
if (clone_flags & CLONE_THREAD)
p->exit_signal = -1;
else
p->exit_signal = current->group_leader->exit_signal;
} else {
p->real_parent = current;
p->parent_exec_id = current->self_exec_id;
p->exit_signal = args->exit_signal;
}
klp_copy_process(p);

View File

@@ -788,8 +788,9 @@ static void put_pi_state(struct futex_pi_state *pi_state)
*/
if (pi_state->owner) {
struct task_struct *owner;
unsigned long flags;
raw_spin_lock_irq(&pi_state->pi_mutex.wait_lock);
raw_spin_lock_irqsave(&pi_state->pi_mutex.wait_lock, flags);
owner = pi_state->owner;
if (owner) {
raw_spin_lock(&owner->pi_lock);
@@ -797,7 +798,7 @@ static void put_pi_state(struct futex_pi_state *pi_state)
raw_spin_unlock(&owner->pi_lock);
}
rt_mutex_proxy_unlock(&pi_state->pi_mutex, owner);
raw_spin_unlock_irq(&pi_state->pi_mutex.wait_lock);
raw_spin_unlock_irqrestore(&pi_state->pi_mutex.wait_lock, flags);
}
if (current->pi_state_cache) {
@@ -1503,8 +1504,10 @@ static int wake_futex_pi(u32 __user *uaddr, u32 uval, struct futex_pi_state *pi_
*/
newval = FUTEX_WAITERS | task_pid_vnr(new_owner);
if (unlikely(should_fail_futex(true)))
if (unlikely(should_fail_futex(true))) {
ret = -EFAULT;
goto out_unlock;
}
ret = cmpxchg_futex_value_locked(&curval, uaddr, uval, newval);
if (!ret && (curval != uval)) {
@@ -2378,10 +2381,22 @@ retry:
}
/*
* Since we just failed the trylock; there must be an owner.
* The trylock just failed, so either there is an owner or
* there is a higher priority waiter than this one.
*/
newowner = rt_mutex_owner(&pi_state->pi_mutex);
BUG_ON(!newowner);
/*
* If the higher priority waiter has not yet taken over the
* rtmutex then newowner is NULL. We can't return here with
* that state because it's inconsistent vs. the user space
* state. So drop the locks and try again. It's a valid
* situation and not any different from the other retry
* conditions.
*/
if (unlikely(!newowner)) {
err = -EAGAIN;
goto handle_err;
}
} else {
WARN_ON_ONCE(argowner != current);
if (oldowner == current) {

View File

@@ -225,8 +225,7 @@ static long hung_timeout_jiffies(unsigned long last_checked,
* Process updating of timeout sysctl
*/
int proc_dohung_task_timeout_secs(struct ctl_table *table, int write,
void __user *buffer,
size_t *lenp, loff_t *ppos)
void *buffer, size_t *lenp, loff_t *ppos)
{
int ret;

View File

@@ -82,6 +82,7 @@ config IRQ_FASTEOI_HIERARCHY_HANDLERS
# Generic IRQ IPI support
config GENERIC_IRQ_IPI
bool
select IRQ_DOMAIN_HIERARCHY
# Generic MSI interrupt support
config GENERIC_MSI_IRQ

View File

@@ -1249,7 +1249,13 @@ __acquires(hlist_lock)
*head = &kretprobe_inst_table[hash];
hlist_lock = kretprobe_table_lock_ptr(hash);
raw_spin_lock_irqsave(hlist_lock, *flags);
/*
* Nested is a workaround that will soon not be needed.
* There's other protections that make sure the same lock
* is not taken on the same CPU that lockdep is unaware of.
* Differentiate when it is taken in NMI context.
*/
raw_spin_lock_irqsave_nested(hlist_lock, *flags, !!in_nmi());
}
NOKPROBE_SYMBOL(kretprobe_hash_lock);
@@ -1258,7 +1264,13 @@ static void kretprobe_table_lock(unsigned long hash,
__acquires(hlist_lock)
{
raw_spinlock_t *hlist_lock = kretprobe_table_lock_ptr(hash);
raw_spin_lock_irqsave(hlist_lock, *flags);
/*
* Nested is a workaround that will soon not be needed.
* There's other protections that make sure the same lock
* is not taken on the same CPU that lockdep is unaware of.
* Differentiate when it is taken in NMI context.
*/
raw_spin_lock_irqsave_nested(hlist_lock, *flags, !!in_nmi());
}
NOKPROBE_SYMBOL(kretprobe_table_lock);
@@ -2028,7 +2040,12 @@ static int pre_handler_kretprobe(struct kprobe *p, struct pt_regs *regs)
/* TODO: consider to only swap the RA after the last pre_handler fired */
hash = hash_ptr(current, KPROBE_HASH_BITS);
raw_spin_lock_irqsave(&rp->lock, flags);
/*
* Nested is a workaround that will soon not be needed.
* There's other protections that make sure the same lock
* is not taken on the same CPU that lockdep is unaware of.
*/
raw_spin_lock_irqsave_nested(&rp->lock, flags, 1);
if (!hlist_empty(&rp->free_instances)) {
ri = hlist_entry(rp->free_instances.first,
struct kretprobe_instance, hlist);
@@ -2039,7 +2056,7 @@ static int pre_handler_kretprobe(struct kprobe *p, struct pt_regs *regs)
ri->task = current;
if (rp->entry_handler && rp->entry_handler(ri, regs)) {
raw_spin_lock_irqsave(&rp->lock, flags);
raw_spin_lock_irqsave_nested(&rp->lock, flags, 1);
hlist_add_head(&ri->hlist, &rp->free_instances);
raw_spin_unlock_irqrestore(&rp->lock, flags);
return 0;

View File

@@ -897,7 +897,8 @@ void kthread_delayed_work_timer_fn(struct timer_list *t)
/* Move the work from worker->delayed_work_list. */
WARN_ON_ONCE(list_empty(&work->node));
list_del_init(&work->node);
kthread_insert_work(worker, work, &worker->work_list);
if (!work->canceling)
kthread_insert_work(worker, work, &worker->work_list);
raw_spin_unlock_irqrestore(&worker->lock, flags);
}

View File

@@ -84,7 +84,7 @@ static inline bool lockdep_enabled(void)
if (!debug_locks)
return false;
if (raw_cpu_read(lockdep_recursion))
if (this_cpu_read(lockdep_recursion))
return false;
if (current->lockdep_recursion)
@@ -108,19 +108,21 @@ static inline void lockdep_lock(void)
{
DEBUG_LOCKS_WARN_ON(!irqs_disabled());
__this_cpu_inc(lockdep_recursion);
arch_spin_lock(&__lock);
__owner = current;
__this_cpu_inc(lockdep_recursion);
}
static inline void lockdep_unlock(void)
{
DEBUG_LOCKS_WARN_ON(!irqs_disabled());
if (debug_locks && DEBUG_LOCKS_WARN_ON(__owner != current))
return;
__this_cpu_dec(lockdep_recursion);
__owner = NULL;
arch_spin_unlock(&__lock);
__this_cpu_dec(lockdep_recursion);
}
static inline bool lockdep_assert_locked(void)
@@ -2765,7 +2767,9 @@ print_deadlock_bug(struct task_struct *curr, struct held_lock *prev,
* (Note that this has to be done separately, because the graph cannot
* detect such classes of deadlocks.)
*
* Returns: 0 on deadlock detected, 1 on OK, 2 on recursive read
* Returns: 0 on deadlock detected, 1 on OK, 2 if another lock with the same
* lock class is held but nest_lock is also held, i.e. we rely on the
* nest_lock to avoid the deadlock.
*/
static int
check_deadlock(struct task_struct *curr, struct held_lock *next)
@@ -2788,7 +2792,7 @@ check_deadlock(struct task_struct *curr, struct held_lock *next)
* lock class (i.e. read_lock(lock)+read_lock(lock)):
*/
if ((next->read == 2) && prev->read)
return 2;
continue;
/*
* We're holding the nest_lock, which serializes this lock's
@@ -3592,16 +3596,13 @@ static int validate_chain(struct task_struct *curr,
if (!ret)
return 0;
/*
* Mark recursive read, as we jump over it when
* building dependencies (just like we jump over
* trylock entries):
*/
if (ret == 2)
hlock->read = 2;
/*
* Add dependency only if this lock is not the head
* of the chain, and if it's not a secondary read-lock:
* of the chain, and if the new lock introduces no more
* lock dependency (because we already hold a lock with the
* same lock class) nor deadlock (because the nest_lock
* serializes nesting locks), see the comments for
* check_deadlock().
*/
if (!chain_head && ret != 2) {
if (!check_prevs_add(curr, hlock))
@@ -4057,7 +4058,7 @@ void lockdep_hardirqs_on_prepare(unsigned long ip)
if (unlikely(in_nmi()))
return;
if (unlikely(__this_cpu_read(lockdep_recursion)))
if (unlikely(this_cpu_read(lockdep_recursion)))
return;
if (unlikely(lockdep_hardirqs_enabled())) {
@@ -4126,7 +4127,7 @@ void noinstr lockdep_hardirqs_on(unsigned long ip)
goto skip_checks;
}
if (unlikely(__this_cpu_read(lockdep_recursion)))
if (unlikely(this_cpu_read(lockdep_recursion)))
return;
if (lockdep_hardirqs_enabled()) {
@@ -4396,6 +4397,9 @@ static int mark_lock(struct task_struct *curr, struct held_lock *this,
if (unlikely(hlock_class(this)->usage_mask & new_mask))
goto unlock;
if (!hlock_class(this)->usage_mask)
debug_atomic_dec(nr_unused_locks);
hlock_class(this)->usage_mask |= new_mask;
if (new_bit < LOCK_TRACE_STATES) {
@@ -4403,19 +4407,10 @@ static int mark_lock(struct task_struct *curr, struct held_lock *this,
return 0;
}
switch (new_bit) {
case 0 ... LOCK_USED-1:
if (new_bit < LOCK_USED) {
ret = mark_lock_irq(curr, this, new_bit);
if (!ret)
return 0;
break;
case LOCK_USED:
debug_atomic_dec(nr_unused_locks);
break;
default:
break;
}
unlock:

View File

@@ -605,7 +605,8 @@ void __warn(const char *file, int line, void *caller, unsigned taint,
panic("panic_on_warn set ...\n");
}
dump_stack();
if (!regs)
dump_stack();
print_irqtrace_events(current);

View File

@@ -530,7 +530,7 @@ struct module_param_attrs
{
unsigned int num;
struct attribute_group grp;
struct param_attribute attrs[0];
struct param_attribute attrs[];
};
#ifdef CONFIG_SYSFS

View File

@@ -146,7 +146,7 @@ int freeze_processes(void)
BUG_ON(in_atomic());
/*
* Now that the whole userspace is frozen we need to disbale
* Now that the whole userspace is frozen we need to disable
* the OOM killer to disallow any further interference with
* killable tasks. There is no guarantee oom victims will
* ever reach a point they go away we have to wait with a timeout.

View File

@@ -345,7 +345,7 @@ DESC_ID((id) - DESCS_COUNT(desc_ring))
*/
struct prb_data_block {
unsigned long id;
char data[0];
char data[];
};
/*

View File

@@ -264,17 +264,11 @@ static int ptrace_check_attach(struct task_struct *child, bool ignore_state)
return ret;
}
static bool ptrace_has_cap(const struct cred *cred, struct user_namespace *ns,
unsigned int mode)
static bool ptrace_has_cap(struct user_namespace *ns, unsigned int mode)
{
int ret;
if (mode & PTRACE_MODE_NOAUDIT)
ret = security_capable(cred, ns, CAP_SYS_PTRACE, CAP_OPT_NOAUDIT);
else
ret = security_capable(cred, ns, CAP_SYS_PTRACE, CAP_OPT_NONE);
return ret == 0;
return ns_capable_noaudit(ns, CAP_SYS_PTRACE);
return ns_capable(ns, CAP_SYS_PTRACE);
}
/* Returns 0 on success, -errno on denial. */
@@ -326,7 +320,7 @@ static int __ptrace_may_access(struct task_struct *task, unsigned int mode)
gid_eq(caller_gid, tcred->sgid) &&
gid_eq(caller_gid, tcred->gid))
goto ok;
if (ptrace_has_cap(cred, tcred->user_ns, mode))
if (ptrace_has_cap(tcred->user_ns, mode))
goto ok;
rcu_read_unlock();
return -EPERM;
@@ -345,7 +339,7 @@ ok:
mm = task->mm;
if (mm &&
((get_dumpable(mm) != SUID_DUMP_USER) &&
!ptrace_has_cap(cred, mm->user_ns, mode)))
!ptrace_has_cap(mm->user_ns, mode)))
return -EPERM;
return security_ptrace_access_check(task, mode);

View File

@@ -409,7 +409,7 @@ bool rcu_eqs_special_set(int cpu)
*
* The caller must have disabled interrupts and must not be idle.
*/
void rcu_momentary_dyntick_idle(void)
notrace void rcu_momentary_dyntick_idle(void)
{
int special;
@@ -4076,7 +4076,6 @@ void rcu_cpu_starting(unsigned int cpu)
smp_mb(); /* Ensure RCU read-side usage follows above initialization. */
}
#ifdef CONFIG_HOTPLUG_CPU
/*
* The outgoing function has no further need of RCU, so remove it from
* the rcu_node tree's ->qsmaskinitnext bit masks.
@@ -4116,6 +4115,7 @@ void rcu_report_dead(unsigned int cpu)
rdp->cpu_started = false;
}
#ifdef CONFIG_HOTPLUG_CPU
/*
* The outgoing CPU has just passed through the dying-idle state, and we
* are being invoked from the CPU that was IPIed to continue the offline

View File

@@ -249,13 +249,16 @@ static bool check_slow_task(struct task_struct *t, void *arg)
/*
* Scan the current list of tasks blocked within RCU read-side critical
* sections, printing out the tid of each.
* sections, printing out the tid of each of the first few of them.
*/
static int rcu_print_task_stall(struct rcu_node *rnp)
static int rcu_print_task_stall(struct rcu_node *rnp, unsigned long flags)
__releases(rnp->lock)
{
int i = 0;
int ndetected = 0;
struct rcu_stall_chk_rdr rscr;
struct task_struct *t;
struct task_struct *ts[8];
if (!rcu_preempt_blocked_readers_cgp(rnp))
return 0;
@@ -264,6 +267,14 @@ static int rcu_print_task_stall(struct rcu_node *rnp)
t = list_entry(rnp->gp_tasks->prev,
struct task_struct, rcu_node_entry);
list_for_each_entry_continue(t, &rnp->blkd_tasks, rcu_node_entry) {
get_task_struct(t);
ts[i++] = t;
if (i >= ARRAY_SIZE(ts))
break;
}
raw_spin_unlock_irqrestore_rcu_node(rnp, flags);
for (i--; i; i--) {
t = ts[i];
if (!try_invoke_on_locked_down_task(t, check_slow_task, &rscr))
pr_cont(" P%d", t->pid);
else
@@ -273,6 +284,7 @@ static int rcu_print_task_stall(struct rcu_node *rnp)
".q"[rscr.rs.b.need_qs],
".e"[rscr.rs.b.exp_hint],
".l"[rscr.on_blkd_list]);
put_task_struct(t);
ndetected++;
}
pr_cont("\n");
@@ -293,8 +305,9 @@ static void rcu_print_detail_task_stall_rnp(struct rcu_node *rnp)
* Because preemptible RCU does not exist, we never have to check for
* tasks blocked within RCU read-side critical sections.
*/
static int rcu_print_task_stall(struct rcu_node *rnp)
static int rcu_print_task_stall(struct rcu_node *rnp, unsigned long flags)
{
raw_spin_unlock_irqrestore_rcu_node(rnp, flags);
return 0;
}
#endif /* #else #ifdef CONFIG_PREEMPT_RCU */
@@ -472,7 +485,6 @@ static void print_other_cpu_stall(unsigned long gp_seq, unsigned long gps)
pr_err("INFO: %s detected stalls on CPUs/tasks:\n", rcu_state.name);
rcu_for_each_leaf_node(rnp) {
raw_spin_lock_irqsave_rcu_node(rnp, flags);
ndetected += rcu_print_task_stall(rnp);
if (rnp->qsmask != 0) {
for_each_leaf_node_possible_cpu(rnp, cpu)
if (rnp->qsmask & leaf_node_cpu_bit(rnp, cpu)) {
@@ -480,7 +492,7 @@ static void print_other_cpu_stall(unsigned long gp_seq, unsigned long gps)
ndetected++;
}
}
raw_spin_unlock_irqrestore_rcu_node(rnp, flags);
ndetected += rcu_print_task_stall(rnp, flags); // Releases rnp->lock.
}
for_each_possible_cpu(cpu)

View File

@@ -551,22 +551,22 @@ static int __init reboot_setup(char *str)
break;
case 's':
{
int rc;
if (isdigit(*(str+1))) {
rc = kstrtoint(str+1, 0, &reboot_cpu);
if (rc)
return rc;
} else if (str[1] == 'm' && str[2] == 'p' &&
isdigit(*(str+3))) {
rc = kstrtoint(str+3, 0, &reboot_cpu);
if (rc)
return rc;
} else
if (isdigit(*(str+1)))
reboot_cpu = simple_strtoul(str+1, NULL, 0);
else if (str[1] == 'm' && str[2] == 'p' &&
isdigit(*(str+3)))
reboot_cpu = simple_strtoul(str+3, NULL, 0);
else
*mode = REBOOT_SOFT;
if (reboot_cpu >= num_possible_cpus()) {
pr_err("Ignoring the CPU number in reboot= option. "
"CPU %d exceeds possible cpu number %d\n",
reboot_cpu, num_possible_cpus());
reboot_cpu = 0;
break;
}
break;
}
case 'g':
*mode = REBOOT_GPIO;
break;

View File

@@ -2976,7 +2976,12 @@ ttwu_do_activate(struct rq *rq, struct task_struct *p, int wake_flags,
#ifdef CONFIG_SMP
if (wake_flags & WF_MIGRATED)
en_flags |= ENQUEUE_MIGRATED;
else
#endif
if (p->in_iowait) {
delayacct_blkio_end(p);
atomic_dec(&task_rq(p)->nr_iowait);
}
activate_task(rq, p, en_flags);
ttwu_do_wakeup(rq, p, wake_flags, rf);
@@ -3363,11 +3368,6 @@ try_to_wake_up(struct task_struct *p, unsigned int state, int wake_flags)
if (READ_ONCE(p->on_rq) && ttwu_runnable(p, wake_flags))
goto unlock;
if (p->in_iowait) {
delayacct_blkio_end(p);
atomic_dec(&task_rq(p)->nr_iowait);
}
#ifdef CONFIG_SMP
/*
* Ensure we load p->on_cpu _after_ p->on_rq, otherwise it would be
@@ -3438,6 +3438,11 @@ try_to_wake_up(struct task_struct *p, unsigned int state, int wake_flags)
cpu = select_task_rq(p, p->wake_cpu, wake_flags | WF_TTWU);
if (task_cpu(p) != cpu) {
if (p->in_iowait) {
delayacct_blkio_end(p);
atomic_dec(&task_rq(p)->nr_iowait);
}
wake_flags |= WF_MIGRATED;
psi_ttwu_dequeue(p);
set_task_cpu(p, cpu);
@@ -5432,20 +5437,21 @@ void rt_mutex_setprio(struct task_struct *p, struct task_struct *pi_task)
if (!dl_prio(p->normal_prio) ||
(pi_task && dl_prio(pi_task->prio) &&
dl_entity_preempt(&pi_task->dl, &p->dl))) {
p->dl.dl_boosted = 1;
p->dl.pi_se = pi_task->dl.pi_se;
queue_flag |= ENQUEUE_REPLENISH;
} else
p->dl.dl_boosted = 0;
} else {
p->dl.pi_se = &p->dl;
}
p->sched_class = &dl_sched_class;
} else if (rt_prio(prio)) {
if (dl_prio(oldprio))
p->dl.dl_boosted = 0;
p->dl.pi_se = &p->dl;
if (oldprio < prio)
queue_flag |= ENQUEUE_HEAD;
p->sched_class = &rt_sched_class;
} else {
if (dl_prio(oldprio))
p->dl.dl_boosted = 0;
p->dl.pi_se = &p->dl;
if (rt_prio(oldprio))
p->rt.timeout = 0;
p->sched_class = &fair_sched_class;

View File

@@ -102,8 +102,12 @@ static bool sugov_should_update_freq(struct sugov_policy *sg_policy, u64 time)
static bool sugov_update_next_freq(struct sugov_policy *sg_policy, u64 time,
unsigned int next_freq)
{
if (sg_policy->next_freq == next_freq)
return false;
if (!sg_policy->need_freq_update) {
if (sg_policy->next_freq == next_freq)
return false;
} else {
sg_policy->need_freq_update = cpufreq_driver_test_flags(CPUFREQ_NEED_UPDATE_LIMITS);
}
sg_policy->next_freq = next_freq;
sg_policy->last_freq_update_time = time;
@@ -164,7 +168,6 @@ static unsigned int get_next_freq(struct sugov_policy *sg_policy,
if (freq == sg_policy->cached_raw_freq && !sg_policy->need_freq_update)
return sg_policy->next_freq;
sg_policy->need_freq_update = false;
sg_policy->cached_raw_freq = freq;
return cpufreq_driver_resolve_freq(policy, freq);
}
@@ -440,7 +443,6 @@ static void sugov_update_single(struct update_util_data *hook, u64 time,
struct sugov_policy *sg_policy = sg_cpu->sg_policy;
unsigned long util, max;
unsigned int next_f;
bool busy;
unsigned int cached_freq = sg_policy->cached_raw_freq;
sugov_iowait_boost(sg_cpu, time, flags);
@@ -451,9 +453,6 @@ static void sugov_update_single(struct update_util_data *hook, u64 time,
if (!sugov_should_update_freq(sg_policy, time))
return;
/* Limits may have changed, don't skip frequency update */
busy = !sg_policy->need_freq_update && sugov_cpu_is_busy(sg_cpu);
util = sugov_get_util(sg_cpu);
max = sg_cpu->max;
util = sugov_iowait_apply(sg_cpu, time, util, max);
@@ -462,7 +461,7 @@ static void sugov_update_single(struct update_util_data *hook, u64 time,
* Do not reduce the frequency if the CPU has not been idle
* recently, as the reduction is likely to be premature then.
*/
if (busy && next_f < sg_policy->next_freq) {
if (sugov_cpu_is_busy(sg_cpu) && next_f < sg_policy->next_freq) {
next_f = sg_policy->next_freq;
/* Restore cached freq as next_freq has changed */
@@ -827,9 +826,10 @@ static int sugov_start(struct cpufreq_policy *policy)
sg_policy->next_freq = 0;
sg_policy->work_in_progress = false;
sg_policy->limits_changed = false;
sg_policy->need_freq_update = false;
sg_policy->cached_raw_freq = 0;
sg_policy->need_freq_update = cpufreq_driver_test_flags(CPUFREQ_NEED_UPDATE_LIMITS);
for_each_cpu(cpu, policy->cpus) {
struct sugov_cpu *sg_cpu = &per_cpu(sugov_cpu, cpu);
@@ -881,7 +881,7 @@ static void sugov_limits(struct cpufreq_policy *policy)
struct cpufreq_governor schedutil_gov = {
.name = "schedutil",
.owner = THIS_MODULE,
.dynamic_switching = true,
.flags = CPUFREQ_GOV_DYNAMIC_SWITCHING,
.init = sugov_init,
.exit = sugov_exit,
.start = sugov_start,

View File

@@ -43,6 +43,28 @@ static inline int on_dl_rq(struct sched_dl_entity *dl_se)
return !RB_EMPTY_NODE(&dl_se->rb_node);
}
#ifdef CONFIG_RT_MUTEXES
static inline struct sched_dl_entity *pi_of(struct sched_dl_entity *dl_se)
{
return dl_se->pi_se;
}
static inline bool is_dl_boosted(struct sched_dl_entity *dl_se)
{
return pi_of(dl_se) != dl_se;
}
#else
static inline struct sched_dl_entity *pi_of(struct sched_dl_entity *dl_se)
{
return dl_se;
}
static inline bool is_dl_boosted(struct sched_dl_entity *dl_se)
{
return false;
}
#endif
#ifdef CONFIG_SMP
static inline struct dl_bw *dl_bw_of(int i)
{
@@ -714,7 +736,7 @@ static inline void setup_new_dl_entity(struct sched_dl_entity *dl_se)
struct dl_rq *dl_rq = dl_rq_of_se(dl_se);
struct rq *rq = rq_of_dl_rq(dl_rq);
WARN_ON(dl_se->dl_boosted);
WARN_ON(is_dl_boosted(dl_se));
WARN_ON(dl_time_before(rq_clock(rq), dl_se->deadline));
/*
@@ -752,21 +774,20 @@ static inline void setup_new_dl_entity(struct sched_dl_entity *dl_se)
* could happen are, typically, a entity voluntarily trying to overcome its
* runtime, or it just underestimated it during sched_setattr().
*/
static void replenish_dl_entity(struct sched_dl_entity *dl_se,
struct sched_dl_entity *pi_se)
static void replenish_dl_entity(struct sched_dl_entity *dl_se)
{
struct dl_rq *dl_rq = dl_rq_of_se(dl_se);
struct rq *rq = rq_of_dl_rq(dl_rq);
BUG_ON(pi_se->dl_runtime <= 0);
BUG_ON(pi_of(dl_se)->dl_runtime <= 0);
/*
* This could be the case for a !-dl task that is boosted.
* Just go with full inherited parameters.
*/
if (dl_se->dl_deadline == 0) {
dl_se->deadline = rq_clock(rq) + pi_se->dl_deadline;
dl_se->runtime = pi_se->dl_runtime;
dl_se->deadline = rq_clock(rq) + pi_of(dl_se)->dl_deadline;
dl_se->runtime = pi_of(dl_se)->dl_runtime;
}
if (dl_se->dl_yielded && dl_se->runtime > 0)
@@ -779,8 +800,8 @@ static void replenish_dl_entity(struct sched_dl_entity *dl_se,
* arbitrary large.
*/
while (dl_se->runtime <= 0) {
dl_se->deadline += pi_se->dl_period;
dl_se->runtime += pi_se->dl_runtime;
dl_se->deadline += pi_of(dl_se)->dl_period;
dl_se->runtime += pi_of(dl_se)->dl_runtime;
}
/*
@@ -794,8 +815,8 @@ static void replenish_dl_entity(struct sched_dl_entity *dl_se,
*/
if (dl_time_before(dl_se->deadline, rq_clock(rq))) {
printk_deferred_once("sched: DL replenish lagged too much\n");
dl_se->deadline = rq_clock(rq) + pi_se->dl_deadline;
dl_se->runtime = pi_se->dl_runtime;
dl_se->deadline = rq_clock(rq) + pi_of(dl_se)->dl_deadline;
dl_se->runtime = pi_of(dl_se)->dl_runtime;
}
if (dl_se->dl_yielded)
@@ -828,8 +849,7 @@ static void replenish_dl_entity(struct sched_dl_entity *dl_se,
* task with deadline equal to period this is the same of using
* dl_period instead of dl_deadline in the equation above.
*/
static bool dl_entity_overflow(struct sched_dl_entity *dl_se,
struct sched_dl_entity *pi_se, u64 t)
static bool dl_entity_overflow(struct sched_dl_entity *dl_se, u64 t)
{
u64 left, right;
@@ -851,9 +871,9 @@ static bool dl_entity_overflow(struct sched_dl_entity *dl_se,
* of anything below microseconds resolution is actually fiction
* (but still we want to give the user that illusion >;).
*/
left = (pi_se->dl_deadline >> DL_SCALE) * (dl_se->runtime >> DL_SCALE);
left = (pi_of(dl_se)->dl_deadline >> DL_SCALE) * (dl_se->runtime >> DL_SCALE);
right = ((dl_se->deadline - t) >> DL_SCALE) *
(pi_se->dl_runtime >> DL_SCALE);
(pi_of(dl_se)->dl_runtime >> DL_SCALE);
return dl_time_before(right, left);
}
@@ -938,24 +958,23 @@ static inline bool dl_is_implicit(struct sched_dl_entity *dl_se)
* Please refer to the comments update_dl_revised_wakeup() function to find
* more about the Revised CBS rule.
*/
static void update_dl_entity(struct sched_dl_entity *dl_se,
struct sched_dl_entity *pi_se)
static void update_dl_entity(struct sched_dl_entity *dl_se)
{
struct dl_rq *dl_rq = dl_rq_of_se(dl_se);
struct rq *rq = rq_of_dl_rq(dl_rq);
if (dl_time_before(dl_se->deadline, rq_clock(rq)) ||
dl_entity_overflow(dl_se, pi_se, rq_clock(rq))) {
dl_entity_overflow(dl_se, rq_clock(rq))) {
if (unlikely(!dl_is_implicit(dl_se) &&
!dl_time_before(dl_se->deadline, rq_clock(rq)) &&
!dl_se->dl_boosted)){
!is_dl_boosted(dl_se))) {
update_dl_revised_wakeup(dl_se, rq);
return;
}
dl_se->deadline = rq_clock(rq) + pi_se->dl_deadline;
dl_se->runtime = pi_se->dl_runtime;
dl_se->deadline = rq_clock(rq) + pi_of(dl_se)->dl_deadline;
dl_se->runtime = pi_of(dl_se)->dl_runtime;
}
}
@@ -1054,7 +1073,7 @@ static enum hrtimer_restart dl_task_timer(struct hrtimer *timer)
* The task might have been boosted by someone else and might be in the
* boosting/deboosting path, its not throttled.
*/
if (dl_se->dl_boosted)
if (is_dl_boosted(dl_se))
goto unlock;
/*
@@ -1082,7 +1101,7 @@ static enum hrtimer_restart dl_task_timer(struct hrtimer *timer)
* but do not enqueue -- wait for our wakeup to do that.
*/
if (!task_on_rq_queued(p)) {
replenish_dl_entity(dl_se, dl_se);
replenish_dl_entity(dl_se);
goto unlock;
}
@@ -1172,7 +1191,7 @@ static inline void dl_check_constrained_dl(struct sched_dl_entity *dl_se)
if (dl_time_before(dl_se->deadline, rq_clock(rq)) &&
dl_time_before(rq_clock(rq), dl_next_period(dl_se))) {
if (unlikely(dl_se->dl_boosted || !start_dl_timer(p)))
if (unlikely(is_dl_boosted(dl_se) || !start_dl_timer(p)))
return;
dl_se->dl_throttled = 1;
if (dl_se->runtime > 0)
@@ -1303,7 +1322,7 @@ throttle:
dl_se->dl_overrun = 1;
__dequeue_task_dl(rq, curr, 0);
if (unlikely(dl_se->dl_boosted || !start_dl_timer(curr)))
if (unlikely(is_dl_boosted(dl_se) || !start_dl_timer(curr)))
enqueue_task_dl(rq, curr, ENQUEUE_REPLENISH);
if (!is_leftmost(curr, &rq->dl))
@@ -1500,8 +1519,7 @@ static void __dequeue_dl_entity(struct sched_dl_entity *dl_se)
}
static void
enqueue_dl_entity(struct sched_dl_entity *dl_se,
struct sched_dl_entity *pi_se, int flags)
enqueue_dl_entity(struct sched_dl_entity *dl_se, int flags)
{
BUG_ON(on_dl_rq(dl_se));
@@ -1512,9 +1530,9 @@ enqueue_dl_entity(struct sched_dl_entity *dl_se,
*/
if (flags & ENQUEUE_WAKEUP) {
task_contending(dl_se, flags);
update_dl_entity(dl_se, pi_se);
update_dl_entity(dl_se);
} else if (flags & ENQUEUE_REPLENISH) {
replenish_dl_entity(dl_se, pi_se);
replenish_dl_entity(dl_se);
} else if ((flags & ENQUEUE_RESTORE) &&
dl_time_before(dl_se->deadline,
rq_clock(rq_of_dl_rq(dl_rq_of_se(dl_se))))) {
@@ -1531,19 +1549,7 @@ static void dequeue_dl_entity(struct sched_dl_entity *dl_se)
static void enqueue_task_dl(struct rq *rq, struct task_struct *p, int flags)
{
struct task_struct *pi_task = rt_mutex_get_top_task(p);
struct sched_dl_entity *pi_se = &p->dl;
/*
* Use the scheduling parameters of the top pi-waiter task if:
* - we have a top pi-waiter which is a SCHED_DEADLINE task AND
* - our dl_boosted is set (i.e. the pi-waiter's (absolute) deadline is
* smaller than our deadline OR we are a !SCHED_DEADLINE task getting
* boosted due to a SCHED_DEADLINE pi-waiter).
* Otherwise we keep our runtime and deadline.
*/
if (pi_task && dl_prio(pi_task->normal_prio) && p->dl.dl_boosted) {
pi_se = &pi_task->dl;
if (is_dl_boosted(&p->dl)) {
/*
* Because of delays in the detection of the overrun of a
* thread's runtime, it might be the case that a thread
@@ -1576,7 +1582,7 @@ static void enqueue_task_dl(struct rq *rq, struct task_struct *p, int flags)
* the throttle.
*/
p->dl.dl_throttled = 0;
BUG_ON(!p->dl.dl_boosted || flags != ENQUEUE_REPLENISH);
BUG_ON(!is_dl_boosted(&p->dl) || flags != ENQUEUE_REPLENISH);
return;
}
@@ -1613,7 +1619,7 @@ static void enqueue_task_dl(struct rq *rq, struct task_struct *p, int flags)
return;
}
enqueue_dl_entity(&p->dl, pi_se, flags);
enqueue_dl_entity(&p->dl, flags);
if (!task_current(rq, p) && p->nr_cpus_allowed > 1)
enqueue_pushable_dl_task(rq, p);
@@ -2832,11 +2838,14 @@ void __dl_clear_params(struct task_struct *p)
dl_se->dl_bw = 0;
dl_se->dl_density = 0;
dl_se->dl_boosted = 0;
dl_se->dl_throttled = 0;
dl_se->dl_yielded = 0;
dl_se->dl_non_contending = 0;
dl_se->dl_overrun = 0;
#ifdef CONFIG_RT_MUTEXES
dl_se->pi_se = dl_se;
#endif
}
bool dl_param_changed(struct task_struct *p, const struct sched_attr *attr)

View File

@@ -251,7 +251,7 @@ static int sd_ctl_doflags(struct ctl_table *table, int write,
unsigned long flags = *(unsigned long *)table->data;
size_t data_size = 0;
size_t len = 0;
char *tmp;
char *tmp, *buf;
int idx;
if (write)
@@ -269,17 +269,17 @@ static int sd_ctl_doflags(struct ctl_table *table, int write,
return 0;
}
tmp = kcalloc(data_size + 1, sizeof(*tmp), GFP_KERNEL);
if (!tmp)
buf = kcalloc(data_size + 1, sizeof(*buf), GFP_KERNEL);
if (!buf)
return -ENOMEM;
for_each_set_bit(idx, &flags, __SD_FLAG_CNT) {
char *name = sd_flag_debug[idx].name;
len += snprintf(tmp + len, strlen(name) + 2, "%s ", name);
len += snprintf(buf + len, strlen(name) + 2, "%s ", name);
}
tmp += *ppos;
tmp = buf + *ppos;
len -= *ppos;
if (len > *lenp)
@@ -294,7 +294,7 @@ static int sd_ctl_doflags(struct ctl_table *table, int write,
*lenp = len;
*ppos += len;
kfree(tmp);
kfree(buf);
return 0;
}

View File

@@ -5497,6 +5497,7 @@ enqueue_task_fair(struct rq *rq, struct task_struct *p, int flags)
struct cfs_rq *cfs_rq;
struct sched_entity *se = &p->se;
int idle_h_nr_running = task_has_idle_policy(p);
int task_new = !(flags & ENQUEUE_WAKEUP);
/*
* The code below (indirectly) updates schedutil which looks at
@@ -5569,7 +5570,7 @@ enqueue_task_fair(struct rq *rq, struct task_struct *p, int flags)
* into account, but that is not straightforward to implement,
* and the following generally works well enough in practice.
*/
if (flags & ENQUEUE_WAKEUP)
if (!task_new)
update_overutilized_status(rq);
enqueue_throttle:
@@ -6195,21 +6196,21 @@ static int select_idle_cpu(struct task_struct *p, struct sched_domain *sd, int t
static int
select_idle_capacity(struct task_struct *p, struct sched_domain *sd, int target)
{
unsigned long best_cap = 0;
unsigned long task_util, best_cap = 0;
int cpu, best_cpu = -1;
struct cpumask *cpus;
sync_entity_load_avg(&p->se);
cpus = this_cpu_cpumask_var_ptr(select_idle_mask);
cpumask_and(cpus, sched_domain_span(sd), p->cpus_ptr);
task_util = uclamp_task_util(p);
for_each_cpu_wrap(cpu, cpus, target) {
unsigned long cpu_cap = capacity_of(cpu);
if (!available_idle_cpu(cpu) && !sched_idle_cpu(cpu))
continue;
if (task_fits_capacity(p, cpu_cap))
if (fits_capacity(task_util, cpu_cap))
return cpu;
if (cpu_cap > best_cap) {
@@ -6221,44 +6222,42 @@ select_idle_capacity(struct task_struct *p, struct sched_domain *sd, int target)
return best_cpu;
}
static inline bool asym_fits_capacity(int task_util, int cpu)
{
if (static_branch_unlikely(&sched_asym_cpucapacity))
return fits_capacity(task_util, capacity_of(cpu));
return true;
}
/*
* Try and locate an idle core/thread in the LLC cache domain.
*/
static int select_idle_sibling(struct task_struct *p, int prev, int target)
{
struct sched_domain *sd;
unsigned long task_util;
int i, recent_used_cpu;
/*
* For asymmetric CPU capacity systems, our domain of interest is
* sd_asym_cpucapacity rather than sd_llc.
* On asymmetric system, update task utilization because we will check
* that the task fits with cpu's capacity.
*/
if (static_branch_unlikely(&sched_asym_cpucapacity)) {
sd = rcu_dereference(per_cpu(sd_asym_cpucapacity, target));
/*
* On an asymmetric CPU capacity system where an exclusive
* cpuset defines a symmetric island (i.e. one unique
* capacity_orig value through the cpuset), the key will be set
* but the CPUs within that cpuset will not have a domain with
* SD_ASYM_CPUCAPACITY. These should follow the usual symmetric
* capacity path.
*/
if (!sd)
goto symmetric;
i = select_idle_capacity(p, sd, target);
return ((unsigned)i < nr_cpumask_bits) ? i : target;
sync_entity_load_avg(&p->se);
task_util = uclamp_task_util(p);
}
symmetric:
if (available_idle_cpu(target) || sched_idle_cpu(target))
if ((available_idle_cpu(target) || sched_idle_cpu(target)) &&
asym_fits_capacity(task_util, target))
return target;
/*
* If the previous CPU is cache affine and idle, don't be stupid:
*/
if (prev != target && cpus_share_cache(prev, target) &&
(available_idle_cpu(prev) || sched_idle_cpu(prev)))
(available_idle_cpu(prev) || sched_idle_cpu(prev)) &&
asym_fits_capacity(task_util, prev))
return prev;
/*
@@ -6281,7 +6280,8 @@ symmetric:
recent_used_cpu != target &&
cpus_share_cache(recent_used_cpu, target) &&
(available_idle_cpu(recent_used_cpu) || sched_idle_cpu(recent_used_cpu)) &&
cpumask_test_cpu(p->recent_used_cpu, p->cpus_ptr)) {
cpumask_test_cpu(p->recent_used_cpu, p->cpus_ptr) &&
asym_fits_capacity(task_util, recent_used_cpu)) {
/*
* Replace recent_used_cpu with prev as it is a potential
* candidate for the next wake:
@@ -6290,6 +6290,26 @@ symmetric:
return recent_used_cpu;
}
/*
* For asymmetric CPU capacity systems, our domain of interest is
* sd_asym_cpucapacity rather than sd_llc.
*/
if (static_branch_unlikely(&sched_asym_cpucapacity)) {
sd = rcu_dereference(per_cpu(sd_asym_cpucapacity, target));
/*
* On an asymmetric CPU capacity system where an exclusive
* cpuset defines a symmetric island (i.e. one unique
* capacity_orig value through the cpuset), the key will be set
* but the CPUs within that cpuset will not have a domain with
* SD_ASYM_CPUCAPACITY. These should follow the usual symmetric
* capacity path.
*/
if (sd) {
i = select_idle_capacity(p, sd, target);
return ((unsigned)i < nr_cpumask_bits) ? i : target;
}
}
sd = rcu_dereference(per_cpu(sd_llc, target));
if (!sd)
return target;
@@ -9070,7 +9090,8 @@ static inline void calculate_imbalance(struct lb_env *env, struct sd_lb_stats *s
* emptying busiest.
*/
if (local->group_type == group_has_spare) {
if (busiest->group_type > group_fully_busy) {
if ((busiest->group_type > group_fully_busy) &&
!(env->sd->flags & SD_SHARE_PKG_RESOURCES)) {
/*
* If busiest is overloaded, try to fill spare
* capacity. This might end up creating spare capacity

View File

@@ -38,7 +38,7 @@
#include <linux/filter.h>
#include <linux/pid.h>
#include <linux/ptrace.h>
#include <linux/security.h>
#include <linux/capability.h>
#include <linux/tracehook.h>
#include <linux/uaccess.h>
#include <linux/anon_inodes.h>
@@ -558,8 +558,7 @@ static struct seccomp_filter *seccomp_prepare_filter(struct sock_fprog *fprog)
* behavior of privileged children.
*/
if (!task_no_new_privs(current) &&
security_capable(current_cred(), current_user_ns(),
CAP_SYS_ADMIN, CAP_OPT_NOAUDIT) != 0)
!ns_capable_noaudit(current_user_ns(), CAP_SYS_ADMIN))
return ERR_PTR(-EACCES);
/* Allocate a new seccomp_filter */

View File

@@ -391,16 +391,17 @@ static bool task_participate_group_stop(struct task_struct *task)
void task_join_group_stop(struct task_struct *task)
{
unsigned long mask = current->jobctl & JOBCTL_STOP_SIGMASK;
struct signal_struct *sig = current->signal;
if (sig->group_stop_count) {
sig->group_stop_count++;
mask |= JOBCTL_STOP_CONSUME;
} else if (!(sig->flags & SIGNAL_STOP_STOPPED))
return;
/* Have the new thread join an on-going signal group stop */
unsigned long jobctl = current->jobctl;
if (jobctl & JOBCTL_STOP_PENDING) {
struct signal_struct *sig = current->signal;
unsigned long signr = jobctl & JOBCTL_STOP_SIGMASK;
unsigned long gstop = JOBCTL_STOP_PENDING | JOBCTL_STOP_CONSUME;
if (task_set_jobctl_pending(task, signr | gstop)) {
sig->group_stop_count++;
}
}
task_set_jobctl_pending(task, mask | JOBCTL_STOP_PENDING);
}
/*

View File

@@ -130,7 +130,7 @@ static __always_inline int csd_lock_wait_getcpu(call_single_data_t *csd)
csd_type = CSD_TYPE(csd);
if (csd_type == CSD_TYPE_ASYNC || csd_type == CSD_TYPE_SYNC)
return csd->dst; /* Other CSD_TYPE_ values might not have ->dst. */
return csd->node.dst; /* Other CSD_TYPE_ values might not have ->dst. */
return -1;
}

View File

@@ -194,7 +194,7 @@ static void ack_state(struct multi_stop_data *msdata)
set_state(msdata, msdata->state + 1);
}
void __weak stop_machine_yield(const struct cpumask *cpumask)
notrace void __weak stop_machine_yield(const struct cpumask *cpumask)
{
cpu_relax();
}

View File

@@ -425,11 +425,6 @@ static inline void debug_hrtimer_deactivate(struct hrtimer *timer)
debug_object_deactivate(timer, &hrtimer_debug_descr);
}
static inline void debug_hrtimer_free(struct hrtimer *timer)
{
debug_object_free(timer, &hrtimer_debug_descr);
}
static void __hrtimer_init(struct hrtimer *timer, clockid_t clock_id,
enum hrtimer_mode mode);

View File

@@ -172,10 +172,6 @@ static void set_cpu_itimer(struct task_struct *tsk, unsigned int clock_id,
u64 oval, nval, ointerval, ninterval;
struct cpu_itimer *it = &tsk->signal->it[clock_id];
/*
* Use the to_ktime conversion because that clamps the maximum
* value to KTIME_MAX and avoid multiplication overflows.
*/
nval = timespec64_to_ns(&value->it_value);
ninterval = timespec64_to_ns(&value->it_interval);

View File

@@ -68,13 +68,13 @@ static inline u64 notrace cyc_to_ns(u64 cyc, u32 mult, u32 shift)
return (cyc * mult) >> shift;
}
struct clock_read_data *sched_clock_read_begin(unsigned int *seq)
notrace struct clock_read_data *sched_clock_read_begin(unsigned int *seq)
{
*seq = raw_read_seqcount_latch(&cd.seq);
return cd.read_data + (*seq & 1);
}
int sched_clock_read_retry(unsigned int seq)
notrace int sched_clock_read_retry(unsigned int seq)
{
return read_seqcount_latch_retry(&cd.seq, seq);
}

View File

@@ -732,11 +732,6 @@ static inline void debug_timer_deactivate(struct timer_list *timer)
debug_object_deactivate(timer, &timer_debug_descr);
}
static inline void debug_timer_free(struct timer_list *timer)
{
debug_object_free(timer, &timer_debug_descr);
}
static inline void debug_timer_assert_init(struct timer_list *timer)
{
debug_object_assert_init(timer, &timer_debug_descr);

View File

@@ -181,6 +181,16 @@ bpf_probe_read_user_str_common(void *dst, u32 size,
{
int ret;
/*
* NB: We rely on strncpy_from_user() not copying junk past the NUL
* terminator into `dst`.
*
* strncpy_from_user() does long-sized strides in the fast path. If the
* strncpy does not mask out the bytes after the NUL in `unsafe_ptr`,
* then there could be junk after the NUL in `dst`. If user takes `dst`
* and keys a hash map with it, then semantically identical strings can
* occupy multiple entries in the map.
*/
ret = strncpy_from_user_nofault(dst, unsafe_ptr, size);
if (unlikely(ret < 0))
memset(dst, 0, size);
@@ -1198,7 +1208,7 @@ static int bpf_btf_printf_prepare(struct btf_ptr *ptr, u32 btf_ptr_size,
*btf = bpf_get_btf_vmlinux();
if (IS_ERR_OR_NULL(*btf))
return PTR_ERR(*btf);
return IS_ERR(*btf) ? PTR_ERR(*btf) : -EINVAL;
if (ptr->type_id > 0)
*btf_id = ptr->type_id;

View File

@@ -438,14 +438,16 @@ enum {
};
/*
* Used for which event context the event is in.
* NMI = 0
* IRQ = 1
* SOFTIRQ = 2
* NORMAL = 3
* TRANSITION = 0
* NMI = 1
* IRQ = 2
* SOFTIRQ = 3
* NORMAL = 4
*
* See trace_recursive_lock() comment below for more details.
*/
enum {
RB_CTX_TRANSITION,
RB_CTX_NMI,
RB_CTX_IRQ,
RB_CTX_SOFTIRQ,
@@ -3014,10 +3016,10 @@ rb_wakeups(struct trace_buffer *buffer, struct ring_buffer_per_cpu *cpu_buffer)
* a bit of overhead in something as critical as function tracing,
* we use a bitmask trick.
*
* bit 0 = NMI context
* bit 1 = IRQ context
* bit 2 = SoftIRQ context
* bit 3 = normal context.
* bit 1 = NMI context
* bit 2 = IRQ context
* bit 3 = SoftIRQ context
* bit 4 = normal context.
*
* This works because this is the order of contexts that can
* preempt other contexts. A SoftIRQ never preempts an IRQ
@@ -3040,6 +3042,30 @@ rb_wakeups(struct trace_buffer *buffer, struct ring_buffer_per_cpu *cpu_buffer)
* The least significant bit can be cleared this way, and it
* just so happens that it is the same bit corresponding to
* the current context.
*
* Now the TRANSITION bit breaks the above slightly. The TRANSITION bit
* is set when a recursion is detected at the current context, and if
* the TRANSITION bit is already set, it will fail the recursion.
* This is needed because there's a lag between the changing of
* interrupt context and updating the preempt count. In this case,
* a false positive will be found. To handle this, one extra recursion
* is allowed, and this is done by the TRANSITION bit. If the TRANSITION
* bit is already set, then it is considered a recursion and the function
* ends. Otherwise, the TRANSITION bit is set, and that bit is returned.
*
* On the trace_recursive_unlock(), the TRANSITION bit will be the first
* to be cleared. Even if it wasn't the context that set it. That is,
* if an interrupt comes in while NORMAL bit is set and the ring buffer
* is called before preempt_count() is updated, since the check will
* be on the NORMAL bit, the TRANSITION bit will then be set. If an
* NMI then comes in, it will set the NMI bit, but when the NMI code
* does the trace_recursive_unlock() it will clear the TRANSTION bit
* and leave the NMI bit set. But this is fine, because the interrupt
* code that set the TRANSITION bit will then clear the NMI bit when it
* calls trace_recursive_unlock(). If another NMI comes in, it will
* set the TRANSITION bit and continue.
*
* Note: The TRANSITION bit only handles a single transition between context.
*/
static __always_inline int
@@ -3055,8 +3081,16 @@ trace_recursive_lock(struct ring_buffer_per_cpu *cpu_buffer)
bit = pc & NMI_MASK ? RB_CTX_NMI :
pc & HARDIRQ_MASK ? RB_CTX_IRQ : RB_CTX_SOFTIRQ;
if (unlikely(val & (1 << (bit + cpu_buffer->nest))))
return 1;
if (unlikely(val & (1 << (bit + cpu_buffer->nest)))) {
/*
* It is possible that this was called by transitioning
* between interrupt context, and preempt_count() has not
* been updated yet. In this case, use the TRANSITION bit.
*/
bit = RB_CTX_TRANSITION;
if (val & (1 << (bit + cpu_buffer->nest)))
return 1;
}
val |= (1 << (bit + cpu_buffer->nest));
cpu_buffer->current_context = val;
@@ -3071,8 +3105,8 @@ trace_recursive_unlock(struct ring_buffer_per_cpu *cpu_buffer)
cpu_buffer->current_context - (1 << cpu_buffer->nest);
}
/* The recursive locking above uses 4 bits */
#define NESTED_BITS 4
/* The recursive locking above uses 5 bits */
#define NESTED_BITS 5
/**
* ring_buffer_nest_start - Allow to trace while nested

View File

@@ -2750,7 +2750,7 @@ trace_event_buffer_lock_reserve(struct trace_buffer **current_rb,
/*
* If tracing is off, but we have triggers enabled
* we still need to look at the event data. Use the temp_buffer
* to store the trace event for the tigger to use. It's recusive
* to store the trace event for the trigger to use. It's recursive
* safe and will not be recorded anywhere.
*/
if (!entry && trace_file->flags & EVENT_FILE_FL_TRIGGER_COND) {
@@ -2952,7 +2952,7 @@ static void __ftrace_trace_stack(struct trace_buffer *buffer,
stackidx = __this_cpu_inc_return(ftrace_stack_reserve) - 1;
/* This should never happen. If it does, yell once and skip */
if (WARN_ON_ONCE(stackidx > FTRACE_KSTACK_NESTING))
if (WARN_ON_ONCE(stackidx >= FTRACE_KSTACK_NESTING))
goto out;
/*
@@ -3132,7 +3132,7 @@ static char *get_trace_buf(void)
/* Interrupts must see nesting incremented before we use the buffer */
barrier();
return &buffer->buffer[buffer->nesting][0];
return &buffer->buffer[buffer->nesting - 1][0];
}
static void put_trace_buf(void)

View File

@@ -637,6 +637,12 @@ enum {
* function is called to clear it.
*/
TRACE_GRAPH_NOTRACE_BIT,
/*
* When transitioning between context, the preempt_count() may
* not be correct. Allow for a single recursion to cover this case.
*/
TRACE_TRANSITION_BIT,
};
#define trace_recursion_set(bit) do { (current)->trace_recursion |= (1<<(bit)); } while (0)
@@ -691,14 +697,27 @@ static __always_inline int trace_test_and_set_recursion(int start, int max)
return 0;
bit = trace_get_context_bit() + start;
if (unlikely(val & (1 << bit)))
return -1;
if (unlikely(val & (1 << bit))) {
/*
* It could be that preempt_count has not been updated during
* a switch between contexts. Allow for a single recursion.
*/
bit = TRACE_TRANSITION_BIT;
if (trace_recursion_test(bit))
return -1;
trace_recursion_set(bit);
barrier();
return bit + 1;
}
/* Normal check passed, clear the transition to allow it again */
trace_recursion_clear(TRACE_TRANSITION_BIT);
val |= 1 << bit;
current->trace_recursion = val;
barrier();
return bit;
return bit + 1;
}
static __always_inline void trace_clear_recursion(int bit)
@@ -708,6 +727,7 @@ static __always_inline void trace_clear_recursion(int bit)
if (!bit)
return;
bit--;
bit = 1 << bit;
val &= ~bit;

View File

@@ -584,7 +584,7 @@ static struct synth_field *parse_synth_field(int argc, const char **argv,
{
struct synth_field *field;
const char *prefix = NULL, *field_type = argv[0], *field_name, *array;
int len, ret = 0;
int len, ret = -ENOMEM;
struct seq_buf s;
ssize_t size;
@@ -617,10 +617,9 @@ static struct synth_field *parse_synth_field(int argc, const char **argv,
len--;
field->name = kmemdup_nul(field_name, len, GFP_KERNEL);
if (!field->name) {
ret = -ENOMEM;
if (!field->name)
goto free;
}
if (!is_good_name(field->name)) {
synth_err(SYNTH_ERR_BAD_NAME, errpos(field_name));
ret = -EINVAL;
@@ -638,10 +637,9 @@ static struct synth_field *parse_synth_field(int argc, const char **argv,
len += strlen(prefix);
field->type = kzalloc(len, GFP_KERNEL);
if (!field->type) {
ret = -ENOMEM;
if (!field->type)
goto free;
}
seq_buf_init(&s, field->type, len);
if (prefix)
seq_buf_puts(&s, prefix);
@@ -653,6 +651,7 @@ static struct synth_field *parse_synth_field(int argc, const char **argv,
}
if (WARN_ON_ONCE(!seq_buf_buffer_left(&s)))
goto free;
s.buffer[s.len] = '\0';
size = synth_field_size(field->type);
@@ -666,10 +665,8 @@ static struct synth_field *parse_synth_field(int argc, const char **argv,
len = sizeof("__data_loc ") + strlen(field->type) + 1;
type = kzalloc(len, GFP_KERNEL);
if (!type) {
ret = -ENOMEM;
if (!type)
goto free;
}
seq_buf_init(&s, type, len);
seq_buf_puts(&s, "__data_loc ");

View File

@@ -492,8 +492,13 @@ trace_selftest_function_recursion(void)
unregister_ftrace_function(&test_rec_probe);
ret = -1;
if (trace_selftest_recursion_cnt != 1) {
pr_cont("*callback not called once (%d)* ",
/*
* Recursion allows for transitions between context,
* and may call the callback twice.
*/
if (trace_selftest_recursion_cnt != 1 &&
trace_selftest_recursion_cnt != 2) {
pr_cont("*callback not called once (or twice) (%d)* ",
trace_selftest_recursion_cnt);
goto out;
}

View File

@@ -50,7 +50,7 @@ static bool ok_to_free_tracepoints;
*/
struct tp_probes {
struct rcu_head rcu;
struct tracepoint_func probes[0];
struct tracepoint_func probes[];
};
static inline void *allocate_probes(int count)

View File

@@ -44,8 +44,6 @@ int __read_mostly soft_watchdog_user_enabled = 1;
int __read_mostly watchdog_thresh = 10;
static int __read_mostly nmi_watchdog_available;
static struct cpumask watchdog_allowed_mask __read_mostly;
struct cpumask watchdog_cpumask __read_mostly;
unsigned long *watchdog_cpumask_bits = cpumask_bits(&watchdog_cpumask);
@@ -162,6 +160,8 @@ static void lockup_detector_update_enable(void)
int __read_mostly sysctl_softlockup_all_cpu_backtrace;
#endif
static struct cpumask watchdog_allowed_mask __read_mostly;
/* Global variables, exported for sysctl */
unsigned int __read_mostly softlockup_panic =
CONFIG_BOOTPARAM_SOFTLOCKUP_PANIC_VALUE;