Merge branch 'linus' into sched/core, to resolve semantic conflict
Signed-off-by: Ingo Molnar <mingo@kernel.org>
This commit is contained in:
@@ -1,6 +1,10 @@
|
||||
# SPDX-License-Identifier: GPL-2.0
|
||||
obj-y := core.o
|
||||
CFLAGS_core.o += $(call cc-disable-warning, override-init)
|
||||
ifneq ($(CONFIG_BPF_JIT_ALWAYS_ON),y)
|
||||
# ___bpf_prog_run() needs GCSE disabled on x86; see 3193c0836f203 for details
|
||||
cflags-nogcse-$(CONFIG_X86)$(CONFIG_CC_IS_GCC) := -fno-gcse
|
||||
endif
|
||||
CFLAGS_core.o += $(call cc-disable-warning, override-init) $(cflags-nogcse-yy)
|
||||
|
||||
obj-$(CONFIG_BPF_SYSCALL) += syscall.o verifier.o inode.o helpers.o tnum.o bpf_iter.o map_iter.o task_iter.o prog_iter.o
|
||||
obj-$(CONFIG_BPF_SYSCALL) += hashtab.o arraymap.o percpu_freelist.o bpf_lru_list.o lpm_trie.o map_in_map.o
|
||||
|
||||
@@ -13,6 +13,7 @@
|
||||
#include <linux/bpf_verifier.h>
|
||||
#include <net/bpf_sk_storage.h>
|
||||
#include <linux/bpf_local_storage.h>
|
||||
#include <linux/btf_ids.h>
|
||||
|
||||
/* For every LSM hook that allows attachment of BPF programs, declare a nop
|
||||
* function where a BPF program can be attached.
|
||||
@@ -26,7 +27,11 @@ noinline RET bpf_lsm_##NAME(__VA_ARGS__) \
|
||||
#include <linux/lsm_hook_defs.h>
|
||||
#undef LSM_HOOK
|
||||
|
||||
#define BPF_LSM_SYM_PREFX "bpf_lsm_"
|
||||
#define LSM_HOOK(RET, DEFAULT, NAME, ...) BTF_ID(func, bpf_lsm_##NAME)
|
||||
BTF_SET_START(bpf_lsm_hooks)
|
||||
#include <linux/lsm_hook_defs.h>
|
||||
#undef LSM_HOOK
|
||||
BTF_SET_END(bpf_lsm_hooks)
|
||||
|
||||
int bpf_lsm_verify_prog(struct bpf_verifier_log *vlog,
|
||||
const struct bpf_prog *prog)
|
||||
@@ -37,8 +42,7 @@ int bpf_lsm_verify_prog(struct bpf_verifier_log *vlog,
|
||||
return -EINVAL;
|
||||
}
|
||||
|
||||
if (strncmp(BPF_LSM_SYM_PREFX, prog->aux->attach_func_name,
|
||||
sizeof(BPF_LSM_SYM_PREFX) - 1)) {
|
||||
if (!btf_id_set_contains(&bpf_lsm_hooks, prog->aux->attach_btf_id)) {
|
||||
bpf_log(vlog, "attach_btf_id %u points to wrong type name %s\n",
|
||||
prog->aux->attach_btf_id, prog->aux->attach_func_name);
|
||||
return -EINVAL;
|
||||
|
||||
@@ -1369,7 +1369,7 @@ u64 __weak bpf_probe_read_kernel(void *dst, u32 size, const void *unsafe_ptr)
|
||||
*
|
||||
* Decode and execute eBPF instructions.
|
||||
*/
|
||||
static u64 __no_fgcse ___bpf_prog_run(u64 *regs, const struct bpf_insn *insn, u64 *stack)
|
||||
static u64 ___bpf_prog_run(u64 *regs, const struct bpf_insn *insn, u64 *stack)
|
||||
{
|
||||
#define BPF_INSN_2_LBL(x, y) [BPF_##x | BPF_##y] = &&x##_##y
|
||||
#define BPF_INSN_3_LBL(x, y, z) [BPF_##x | BPF_##y | BPF_##z] = &&x##_##y##_##z
|
||||
|
||||
@@ -821,6 +821,32 @@ static void pcpu_copy_value(struct bpf_htab *htab, void __percpu *pptr,
|
||||
}
|
||||
}
|
||||
|
||||
static void pcpu_init_value(struct bpf_htab *htab, void __percpu *pptr,
|
||||
void *value, bool onallcpus)
|
||||
{
|
||||
/* When using prealloc and not setting the initial value on all cpus,
|
||||
* zero-fill element values for other cpus (just as what happens when
|
||||
* not using prealloc). Otherwise, bpf program has no way to ensure
|
||||
* known initial values for cpus other than current one
|
||||
* (onallcpus=false always when coming from bpf prog).
|
||||
*/
|
||||
if (htab_is_prealloc(htab) && !onallcpus) {
|
||||
u32 size = round_up(htab->map.value_size, 8);
|
||||
int current_cpu = raw_smp_processor_id();
|
||||
int cpu;
|
||||
|
||||
for_each_possible_cpu(cpu) {
|
||||
if (cpu == current_cpu)
|
||||
bpf_long_memcpy(per_cpu_ptr(pptr, cpu), value,
|
||||
size);
|
||||
else
|
||||
memset(per_cpu_ptr(pptr, cpu), 0, size);
|
||||
}
|
||||
} else {
|
||||
pcpu_copy_value(htab, pptr, value, onallcpus);
|
||||
}
|
||||
}
|
||||
|
||||
static bool fd_htab_map_needs_adjust(const struct bpf_htab *htab)
|
||||
{
|
||||
return htab->map.map_type == BPF_MAP_TYPE_HASH_OF_MAPS &&
|
||||
@@ -891,7 +917,7 @@ static struct htab_elem *alloc_htab_elem(struct bpf_htab *htab, void *key,
|
||||
}
|
||||
}
|
||||
|
||||
pcpu_copy_value(htab, pptr, value, onallcpus);
|
||||
pcpu_init_value(htab, pptr, value, onallcpus);
|
||||
|
||||
if (!prealloc)
|
||||
htab_elem_set_ptr(l_new, key_size, pptr);
|
||||
@@ -1183,7 +1209,7 @@ static int __htab_lru_percpu_map_update_elem(struct bpf_map *map, void *key,
|
||||
pcpu_copy_value(htab, htab_elem_get_ptr(l_old, key_size),
|
||||
value, onallcpus);
|
||||
} else {
|
||||
pcpu_copy_value(htab, htab_elem_get_ptr(l_new, key_size),
|
||||
pcpu_init_value(htab, htab_elem_get_ptr(l_new, key_size),
|
||||
value, onallcpus);
|
||||
hlist_nulls_add_head_rcu(&l_new->hash_node, head);
|
||||
l_new = NULL;
|
||||
|
||||
@@ -6,6 +6,7 @@ config USERMODE_DRIVER
|
||||
menuconfig BPF_PRELOAD
|
||||
bool "Preload BPF file system with kernel specific program and map iterators"
|
||||
depends on BPF
|
||||
depends on BPF_SYSCALL
|
||||
# The dependency on !COMPILE_TEST prevents it from being enabled
|
||||
# in allmodconfig or allyesconfig configurations
|
||||
depends on !COMPILE_TEST
|
||||
|
||||
@@ -7786,9 +7786,11 @@ static int check_return_code(struct bpf_verifier_env *env)
|
||||
struct tnum range = tnum_range(0, 1);
|
||||
enum bpf_prog_type prog_type = resolve_prog_type(env->prog);
|
||||
int err;
|
||||
const bool is_subprog = env->cur_state->frame[0]->subprogno;
|
||||
|
||||
/* LSM and struct_ops func-ptr's return type could be "void" */
|
||||
if ((prog_type == BPF_PROG_TYPE_STRUCT_OPS ||
|
||||
if (!is_subprog &&
|
||||
(prog_type == BPF_PROG_TYPE_STRUCT_OPS ||
|
||||
prog_type == BPF_PROG_TYPE_LSM) &&
|
||||
!prog->aux->attach_func_proto->type)
|
||||
return 0;
|
||||
@@ -7808,6 +7810,16 @@ static int check_return_code(struct bpf_verifier_env *env)
|
||||
return -EACCES;
|
||||
}
|
||||
|
||||
reg = cur_regs(env) + BPF_REG_0;
|
||||
if (is_subprog) {
|
||||
if (reg->type != SCALAR_VALUE) {
|
||||
verbose(env, "At subprogram exit the register R0 is not a scalar value (%s)\n",
|
||||
reg_type_str[reg->type]);
|
||||
return -EINVAL;
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
switch (prog_type) {
|
||||
case BPF_PROG_TYPE_CGROUP_SOCK_ADDR:
|
||||
if (env->prog->expected_attach_type == BPF_CGROUP_UDP4_RECVMSG ||
|
||||
@@ -7861,7 +7873,6 @@ static int check_return_code(struct bpf_verifier_env *env)
|
||||
return 0;
|
||||
}
|
||||
|
||||
reg = cur_regs(env) + BPF_REG_0;
|
||||
if (reg->type != SCALAR_VALUE) {
|
||||
verbose(env, "At program exit the register R0 is not a known value (%s)\n",
|
||||
reg_type_str[reg->type]);
|
||||
@@ -9572,12 +9583,13 @@ static int check_pseudo_btf_id(struct bpf_verifier_env *env,
|
||||
struct bpf_insn *insn,
|
||||
struct bpf_insn_aux_data *aux)
|
||||
{
|
||||
u32 datasec_id, type, id = insn->imm;
|
||||
const struct btf_var_secinfo *vsi;
|
||||
const struct btf_type *datasec;
|
||||
const struct btf_type *t;
|
||||
const char *sym_name;
|
||||
bool percpu = false;
|
||||
u32 type, id = insn->imm;
|
||||
s32 datasec_id;
|
||||
u64 addr;
|
||||
int i;
|
||||
|
||||
|
||||
@@ -229,6 +229,7 @@ int __init swiotlb_init_with_tbl(char *tlb, unsigned long nslabs, int verbose)
|
||||
io_tlb_orig_addr[i] = INVALID_PHYS_ADDR;
|
||||
}
|
||||
io_tlb_index = 0;
|
||||
no_iotlb_memory = false;
|
||||
|
||||
if (verbose)
|
||||
swiotlb_print_info();
|
||||
@@ -260,9 +261,11 @@ swiotlb_init(int verbose)
|
||||
if (vstart && !swiotlb_init_with_tbl(vstart, io_tlb_nslabs, verbose))
|
||||
return;
|
||||
|
||||
if (io_tlb_start)
|
||||
if (io_tlb_start) {
|
||||
memblock_free_early(io_tlb_start,
|
||||
PAGE_ALIGN(io_tlb_nslabs << IO_TLB_SHIFT));
|
||||
io_tlb_start = 0;
|
||||
}
|
||||
pr_warn("Cannot allocate buffer");
|
||||
no_iotlb_memory = true;
|
||||
}
|
||||
@@ -360,6 +363,7 @@ swiotlb_late_init_with_tbl(char *tlb, unsigned long nslabs)
|
||||
io_tlb_orig_addr[i] = INVALID_PHYS_ADDR;
|
||||
}
|
||||
io_tlb_index = 0;
|
||||
no_iotlb_memory = false;
|
||||
|
||||
swiotlb_print_info();
|
||||
|
||||
@@ -441,14 +445,11 @@ static void swiotlb_bounce(phys_addr_t orig_addr, phys_addr_t tlb_addr,
|
||||
}
|
||||
}
|
||||
|
||||
phys_addr_t swiotlb_tbl_map_single(struct device *hwdev,
|
||||
dma_addr_t tbl_dma_addr,
|
||||
phys_addr_t orig_addr,
|
||||
size_t mapping_size,
|
||||
size_t alloc_size,
|
||||
enum dma_data_direction dir,
|
||||
unsigned long attrs)
|
||||
phys_addr_t swiotlb_tbl_map_single(struct device *hwdev, phys_addr_t orig_addr,
|
||||
size_t mapping_size, size_t alloc_size,
|
||||
enum dma_data_direction dir, unsigned long attrs)
|
||||
{
|
||||
dma_addr_t tbl_dma_addr = phys_to_dma_unencrypted(hwdev, io_tlb_start);
|
||||
unsigned long flags;
|
||||
phys_addr_t tlb_addr;
|
||||
unsigned int nslots, stride, index, wrap;
|
||||
@@ -667,9 +668,8 @@ dma_addr_t swiotlb_map(struct device *dev, phys_addr_t paddr, size_t size,
|
||||
trace_swiotlb_bounced(dev, phys_to_dma(dev, paddr), size,
|
||||
swiotlb_force);
|
||||
|
||||
swiotlb_addr = swiotlb_tbl_map_single(dev,
|
||||
phys_to_dma_unencrypted(dev, io_tlb_start),
|
||||
paddr, size, size, dir, attrs);
|
||||
swiotlb_addr = swiotlb_tbl_map_single(dev, paddr, size, size, dir,
|
||||
attrs);
|
||||
if (swiotlb_addr == (phys_addr_t)DMA_MAPPING_ERROR)
|
||||
return DMA_MAPPING_ERROR;
|
||||
|
||||
|
||||
@@ -337,10 +337,10 @@ noinstr irqentry_state_t irqentry_enter(struct pt_regs *regs)
|
||||
* already contains a warning when RCU is not watching, so no point
|
||||
* in having another one here.
|
||||
*/
|
||||
lockdep_hardirqs_off(CALLER_ADDR0);
|
||||
instrumentation_begin();
|
||||
rcu_irq_enter_check_tick();
|
||||
/* Use the combo lockdep/tracing function */
|
||||
trace_hardirqs_off();
|
||||
trace_hardirqs_off_finish();
|
||||
instrumentation_end();
|
||||
|
||||
return ret;
|
||||
|
||||
@@ -2312,9 +2312,6 @@ group_sched_out(struct perf_event *group_event,
|
||||
event_sched_out(event, cpuctx, ctx);
|
||||
|
||||
perf_pmu_enable(ctx->pmu);
|
||||
|
||||
if (group_event->attr.exclusive)
|
||||
cpuctx->exclusive = 0;
|
||||
}
|
||||
|
||||
#define DETACH_GROUP 0x01UL
|
||||
@@ -2583,11 +2580,8 @@ group_sched_in(struct perf_event *group_event,
|
||||
|
||||
pmu->start_txn(pmu, PERF_PMU_TXN_ADD);
|
||||
|
||||
if (event_sched_in(group_event, cpuctx, ctx)) {
|
||||
pmu->cancel_txn(pmu);
|
||||
perf_mux_hrtimer_restart(cpuctx);
|
||||
return -EAGAIN;
|
||||
}
|
||||
if (event_sched_in(group_event, cpuctx, ctx))
|
||||
goto error;
|
||||
|
||||
/*
|
||||
* Schedule in siblings as one group (if any):
|
||||
@@ -2616,10 +2610,8 @@ group_error:
|
||||
}
|
||||
event_sched_out(group_event, cpuctx, ctx);
|
||||
|
||||
error:
|
||||
pmu->cancel_txn(pmu);
|
||||
|
||||
perf_mux_hrtimer_restart(cpuctx);
|
||||
|
||||
return -EAGAIN;
|
||||
}
|
||||
|
||||
@@ -2645,7 +2637,7 @@ static int group_can_go_on(struct perf_event *event,
|
||||
* If this group is exclusive and there are already
|
||||
* events on the CPU, it can't go on.
|
||||
*/
|
||||
if (event->attr.exclusive && cpuctx->active_oncpu)
|
||||
if (event->attr.exclusive && !list_empty(get_event_list(event)))
|
||||
return 0;
|
||||
/*
|
||||
* Otherwise, try to add it if all previous groups were able
|
||||
@@ -3679,6 +3671,7 @@ static int merge_sched_in(struct perf_event *event, void *data)
|
||||
|
||||
*can_add_hw = 0;
|
||||
ctx->rotate_necessary = 1;
|
||||
perf_mux_hrtimer_restart(cpuctx);
|
||||
}
|
||||
|
||||
return 0;
|
||||
@@ -6374,14 +6367,13 @@ perf_output_sample_regs(struct perf_output_handle *handle,
|
||||
}
|
||||
|
||||
static void perf_sample_regs_user(struct perf_regs *regs_user,
|
||||
struct pt_regs *regs,
|
||||
struct pt_regs *regs_user_copy)
|
||||
struct pt_regs *regs)
|
||||
{
|
||||
if (user_mode(regs)) {
|
||||
regs_user->abi = perf_reg_abi(current);
|
||||
regs_user->regs = regs;
|
||||
} else if (!(current->flags & PF_KTHREAD)) {
|
||||
perf_get_regs_user(regs_user, regs, regs_user_copy);
|
||||
perf_get_regs_user(regs_user, regs);
|
||||
} else {
|
||||
regs_user->abi = PERF_SAMPLE_REGS_ABI_NONE;
|
||||
regs_user->regs = NULL;
|
||||
@@ -7083,8 +7075,7 @@ void perf_prepare_sample(struct perf_event_header *header,
|
||||
}
|
||||
|
||||
if (sample_type & (PERF_SAMPLE_REGS_USER | PERF_SAMPLE_STACK_USER))
|
||||
perf_sample_regs_user(&data->regs_user, regs,
|
||||
&data->regs_user_copy);
|
||||
perf_sample_regs_user(&data->regs_user, regs);
|
||||
|
||||
if (sample_type & PERF_SAMPLE_REGS_USER) {
|
||||
/* regs dump ABI info */
|
||||
@@ -7186,6 +7177,7 @@ __perf_event_output(struct perf_event *event,
|
||||
struct perf_sample_data *data,
|
||||
struct pt_regs *regs,
|
||||
int (*output_begin)(struct perf_output_handle *,
|
||||
struct perf_sample_data *,
|
||||
struct perf_event *,
|
||||
unsigned int))
|
||||
{
|
||||
@@ -7198,7 +7190,7 @@ __perf_event_output(struct perf_event *event,
|
||||
|
||||
perf_prepare_sample(&header, data, event, regs);
|
||||
|
||||
err = output_begin(&handle, event, header.size);
|
||||
err = output_begin(&handle, data, event, header.size);
|
||||
if (err)
|
||||
goto exit;
|
||||
|
||||
@@ -7264,7 +7256,7 @@ perf_event_read_event(struct perf_event *event,
|
||||
int ret;
|
||||
|
||||
perf_event_header__init_id(&read_event.header, &sample, event);
|
||||
ret = perf_output_begin(&handle, event, read_event.header.size);
|
||||
ret = perf_output_begin(&handle, &sample, event, read_event.header.size);
|
||||
if (ret)
|
||||
return;
|
||||
|
||||
@@ -7533,7 +7525,7 @@ static void perf_event_task_output(struct perf_event *event,
|
||||
|
||||
perf_event_header__init_id(&task_event->event_id.header, &sample, event);
|
||||
|
||||
ret = perf_output_begin(&handle, event,
|
||||
ret = perf_output_begin(&handle, &sample, event,
|
||||
task_event->event_id.header.size);
|
||||
if (ret)
|
||||
goto out;
|
||||
@@ -7636,7 +7628,7 @@ static void perf_event_comm_output(struct perf_event *event,
|
||||
return;
|
||||
|
||||
perf_event_header__init_id(&comm_event->event_id.header, &sample, event);
|
||||
ret = perf_output_begin(&handle, event,
|
||||
ret = perf_output_begin(&handle, &sample, event,
|
||||
comm_event->event_id.header.size);
|
||||
|
||||
if (ret)
|
||||
@@ -7736,7 +7728,7 @@ static void perf_event_namespaces_output(struct perf_event *event,
|
||||
|
||||
perf_event_header__init_id(&namespaces_event->event_id.header,
|
||||
&sample, event);
|
||||
ret = perf_output_begin(&handle, event,
|
||||
ret = perf_output_begin(&handle, &sample, event,
|
||||
namespaces_event->event_id.header.size);
|
||||
if (ret)
|
||||
goto out;
|
||||
@@ -7863,7 +7855,7 @@ static void perf_event_cgroup_output(struct perf_event *event, void *data)
|
||||
|
||||
perf_event_header__init_id(&cgroup_event->event_id.header,
|
||||
&sample, event);
|
||||
ret = perf_output_begin(&handle, event,
|
||||
ret = perf_output_begin(&handle, &sample, event,
|
||||
cgroup_event->event_id.header.size);
|
||||
if (ret)
|
||||
goto out;
|
||||
@@ -7989,7 +7981,7 @@ static void perf_event_mmap_output(struct perf_event *event,
|
||||
}
|
||||
|
||||
perf_event_header__init_id(&mmap_event->event_id.header, &sample, event);
|
||||
ret = perf_output_begin(&handle, event,
|
||||
ret = perf_output_begin(&handle, &sample, event,
|
||||
mmap_event->event_id.header.size);
|
||||
if (ret)
|
||||
goto out;
|
||||
@@ -8299,7 +8291,7 @@ void perf_event_aux_event(struct perf_event *event, unsigned long head,
|
||||
int ret;
|
||||
|
||||
perf_event_header__init_id(&rec.header, &sample, event);
|
||||
ret = perf_output_begin(&handle, event, rec.header.size);
|
||||
ret = perf_output_begin(&handle, &sample, event, rec.header.size);
|
||||
|
||||
if (ret)
|
||||
return;
|
||||
@@ -8333,7 +8325,7 @@ void perf_log_lost_samples(struct perf_event *event, u64 lost)
|
||||
|
||||
perf_event_header__init_id(&lost_samples_event.header, &sample, event);
|
||||
|
||||
ret = perf_output_begin(&handle, event,
|
||||
ret = perf_output_begin(&handle, &sample, event,
|
||||
lost_samples_event.header.size);
|
||||
if (ret)
|
||||
return;
|
||||
@@ -8388,7 +8380,7 @@ static void perf_event_switch_output(struct perf_event *event, void *data)
|
||||
|
||||
perf_event_header__init_id(&se->event_id.header, &sample, event);
|
||||
|
||||
ret = perf_output_begin(&handle, event, se->event_id.header.size);
|
||||
ret = perf_output_begin(&handle, &sample, event, se->event_id.header.size);
|
||||
if (ret)
|
||||
return;
|
||||
|
||||
@@ -8463,7 +8455,7 @@ static void perf_log_throttle(struct perf_event *event, int enable)
|
||||
|
||||
perf_event_header__init_id(&throttle_event.header, &sample, event);
|
||||
|
||||
ret = perf_output_begin(&handle, event,
|
||||
ret = perf_output_begin(&handle, &sample, event,
|
||||
throttle_event.header.size);
|
||||
if (ret)
|
||||
return;
|
||||
@@ -8506,7 +8498,7 @@ static void perf_event_ksymbol_output(struct perf_event *event, void *data)
|
||||
|
||||
perf_event_header__init_id(&ksymbol_event->event_id.header,
|
||||
&sample, event);
|
||||
ret = perf_output_begin(&handle, event,
|
||||
ret = perf_output_begin(&handle, &sample, event,
|
||||
ksymbol_event->event_id.header.size);
|
||||
if (ret)
|
||||
return;
|
||||
@@ -8596,7 +8588,7 @@ static void perf_event_bpf_output(struct perf_event *event, void *data)
|
||||
|
||||
perf_event_header__init_id(&bpf_event->event_id.header,
|
||||
&sample, event);
|
||||
ret = perf_output_begin(&handle, event,
|
||||
ret = perf_output_begin(&handle, data, event,
|
||||
bpf_event->event_id.header.size);
|
||||
if (ret)
|
||||
return;
|
||||
@@ -8705,7 +8697,8 @@ static void perf_event_text_poke_output(struct perf_event *event, void *data)
|
||||
|
||||
perf_event_header__init_id(&text_poke_event->event_id.header, &sample, event);
|
||||
|
||||
ret = perf_output_begin(&handle, event, text_poke_event->event_id.header.size);
|
||||
ret = perf_output_begin(&handle, &sample, event,
|
||||
text_poke_event->event_id.header.size);
|
||||
if (ret)
|
||||
return;
|
||||
|
||||
@@ -8786,7 +8779,7 @@ static void perf_log_itrace_start(struct perf_event *event)
|
||||
rec.tid = perf_event_tid(event, current);
|
||||
|
||||
perf_event_header__init_id(&rec.header, &sample, event);
|
||||
ret = perf_output_begin(&handle, event, rec.header.size);
|
||||
ret = perf_output_begin(&handle, &sample, event, rec.header.size);
|
||||
|
||||
if (ret)
|
||||
return;
|
||||
@@ -10085,6 +10078,7 @@ perf_event_parse_addr_filter(struct perf_event *event, char *fstr,
|
||||
if (token == IF_SRC_FILE || token == IF_SRC_FILEADDR) {
|
||||
int fpos = token == IF_SRC_FILE ? 2 : 1;
|
||||
|
||||
kfree(filename);
|
||||
filename = match_strdup(&args[fpos]);
|
||||
if (!filename) {
|
||||
ret = -ENOMEM;
|
||||
@@ -10131,16 +10125,13 @@ perf_event_parse_addr_filter(struct perf_event *event, char *fstr,
|
||||
*/
|
||||
ret = -EOPNOTSUPP;
|
||||
if (!event->ctx->task)
|
||||
goto fail_free_name;
|
||||
goto fail;
|
||||
|
||||
/* look up the path and grab its inode */
|
||||
ret = kern_path(filename, LOOKUP_FOLLOW,
|
||||
&filter->path);
|
||||
if (ret)
|
||||
goto fail_free_name;
|
||||
|
||||
kfree(filename);
|
||||
filename = NULL;
|
||||
goto fail;
|
||||
|
||||
ret = -EINVAL;
|
||||
if (!filter->path.dentry ||
|
||||
@@ -10160,13 +10151,13 @@ perf_event_parse_addr_filter(struct perf_event *event, char *fstr,
|
||||
if (state != IF_STATE_ACTION)
|
||||
goto fail;
|
||||
|
||||
kfree(filename);
|
||||
kfree(orig);
|
||||
|
||||
return 0;
|
||||
|
||||
fail_free_name:
|
||||
kfree(filename);
|
||||
fail:
|
||||
kfree(filename);
|
||||
free_filters_list(filters);
|
||||
kfree(orig);
|
||||
|
||||
|
||||
@@ -205,16 +205,12 @@ DEFINE_OUTPUT_COPY(__output_copy_user, arch_perf_out_copy_user)
|
||||
|
||||
static inline int get_recursion_context(int *recursion)
|
||||
{
|
||||
int rctx;
|
||||
unsigned int pc = preempt_count();
|
||||
unsigned char rctx = 0;
|
||||
|
||||
if (unlikely(in_nmi()))
|
||||
rctx = 3;
|
||||
else if (in_irq())
|
||||
rctx = 2;
|
||||
else if (in_softirq())
|
||||
rctx = 1;
|
||||
else
|
||||
rctx = 0;
|
||||
rctx += !!(pc & (NMI_MASK));
|
||||
rctx += !!(pc & (NMI_MASK | HARDIRQ_MASK));
|
||||
rctx += !!(pc & (NMI_MASK | HARDIRQ_MASK | SOFTIRQ_OFFSET));
|
||||
|
||||
if (recursion[rctx])
|
||||
return -1;
|
||||
|
||||
@@ -147,6 +147,7 @@ ring_buffer_has_space(unsigned long head, unsigned long tail,
|
||||
|
||||
static __always_inline int
|
||||
__perf_output_begin(struct perf_output_handle *handle,
|
||||
struct perf_sample_data *data,
|
||||
struct perf_event *event, unsigned int size,
|
||||
bool backward)
|
||||
{
|
||||
@@ -237,18 +238,16 @@ __perf_output_begin(struct perf_output_handle *handle,
|
||||
handle->size = (1UL << page_shift) - offset;
|
||||
|
||||
if (unlikely(have_lost)) {
|
||||
struct perf_sample_data sample_data;
|
||||
|
||||
lost_event.header.size = sizeof(lost_event);
|
||||
lost_event.header.type = PERF_RECORD_LOST;
|
||||
lost_event.header.misc = 0;
|
||||
lost_event.id = event->id;
|
||||
lost_event.lost = local_xchg(&rb->lost, 0);
|
||||
|
||||
perf_event_header__init_id(&lost_event.header,
|
||||
&sample_data, event);
|
||||
/* XXX mostly redundant; @data is already fully initializes */
|
||||
perf_event_header__init_id(&lost_event.header, data, event);
|
||||
perf_output_put(handle, lost_event);
|
||||
perf_event__output_id_sample(event, handle, &sample_data);
|
||||
perf_event__output_id_sample(event, handle, data);
|
||||
}
|
||||
|
||||
return 0;
|
||||
@@ -263,22 +262,25 @@ out:
|
||||
}
|
||||
|
||||
int perf_output_begin_forward(struct perf_output_handle *handle,
|
||||
struct perf_event *event, unsigned int size)
|
||||
struct perf_sample_data *data,
|
||||
struct perf_event *event, unsigned int size)
|
||||
{
|
||||
return __perf_output_begin(handle, event, size, false);
|
||||
return __perf_output_begin(handle, data, event, size, false);
|
||||
}
|
||||
|
||||
int perf_output_begin_backward(struct perf_output_handle *handle,
|
||||
struct perf_sample_data *data,
|
||||
struct perf_event *event, unsigned int size)
|
||||
{
|
||||
return __perf_output_begin(handle, event, size, true);
|
||||
return __perf_output_begin(handle, data, event, size, true);
|
||||
}
|
||||
|
||||
int perf_output_begin(struct perf_output_handle *handle,
|
||||
struct perf_sample_data *data,
|
||||
struct perf_event *event, unsigned int size)
|
||||
{
|
||||
|
||||
return __perf_output_begin(handle, event, size,
|
||||
return __perf_output_begin(handle, data, event, size,
|
||||
unlikely(is_write_backward(event)));
|
||||
}
|
||||
|
||||
|
||||
@@ -454,7 +454,10 @@ static void exit_mm(void)
|
||||
mmap_read_unlock(mm);
|
||||
|
||||
self.task = current;
|
||||
self.next = xchg(&core_state->dumper.next, &self);
|
||||
if (self.task->flags & PF_SIGNALED)
|
||||
self.next = xchg(&core_state->dumper.next, &self);
|
||||
else
|
||||
self.task = NULL;
|
||||
/*
|
||||
* Implies mb(), the result of xchg() must be visible
|
||||
* to core_state->dumper.
|
||||
|
||||
@@ -253,7 +253,7 @@ static ssize_t fei_write(struct file *file, const char __user *buffer,
|
||||
|
||||
if (copy_from_user(buf, buffer, count)) {
|
||||
ret = -EFAULT;
|
||||
goto out;
|
||||
goto out_free;
|
||||
}
|
||||
buf[count] = '\0';
|
||||
sym = strstrip(buf);
|
||||
@@ -307,8 +307,9 @@ static ssize_t fei_write(struct file *file, const char __user *buffer,
|
||||
ret = count;
|
||||
}
|
||||
out:
|
||||
kfree(buf);
|
||||
mutex_unlock(&fei_lock);
|
||||
out_free:
|
||||
kfree(buf);
|
||||
return ret;
|
||||
}
|
||||
|
||||
|
||||
@@ -2167,14 +2167,9 @@ static __latent_entropy struct task_struct *copy_process(
|
||||
/* ok, now we should be set up.. */
|
||||
p->pid = pid_nr(pid);
|
||||
if (clone_flags & CLONE_THREAD) {
|
||||
p->exit_signal = -1;
|
||||
p->group_leader = current->group_leader;
|
||||
p->tgid = current->tgid;
|
||||
} else {
|
||||
if (clone_flags & CLONE_PARENT)
|
||||
p->exit_signal = current->group_leader->exit_signal;
|
||||
else
|
||||
p->exit_signal = args->exit_signal;
|
||||
p->group_leader = p;
|
||||
p->tgid = p->pid;
|
||||
}
|
||||
@@ -2218,9 +2213,14 @@ static __latent_entropy struct task_struct *copy_process(
|
||||
if (clone_flags & (CLONE_PARENT|CLONE_THREAD)) {
|
||||
p->real_parent = current->real_parent;
|
||||
p->parent_exec_id = current->parent_exec_id;
|
||||
if (clone_flags & CLONE_THREAD)
|
||||
p->exit_signal = -1;
|
||||
else
|
||||
p->exit_signal = current->group_leader->exit_signal;
|
||||
} else {
|
||||
p->real_parent = current;
|
||||
p->parent_exec_id = current->self_exec_id;
|
||||
p->exit_signal = args->exit_signal;
|
||||
}
|
||||
|
||||
klp_copy_process(p);
|
||||
|
||||
@@ -788,8 +788,9 @@ static void put_pi_state(struct futex_pi_state *pi_state)
|
||||
*/
|
||||
if (pi_state->owner) {
|
||||
struct task_struct *owner;
|
||||
unsigned long flags;
|
||||
|
||||
raw_spin_lock_irq(&pi_state->pi_mutex.wait_lock);
|
||||
raw_spin_lock_irqsave(&pi_state->pi_mutex.wait_lock, flags);
|
||||
owner = pi_state->owner;
|
||||
if (owner) {
|
||||
raw_spin_lock(&owner->pi_lock);
|
||||
@@ -797,7 +798,7 @@ static void put_pi_state(struct futex_pi_state *pi_state)
|
||||
raw_spin_unlock(&owner->pi_lock);
|
||||
}
|
||||
rt_mutex_proxy_unlock(&pi_state->pi_mutex, owner);
|
||||
raw_spin_unlock_irq(&pi_state->pi_mutex.wait_lock);
|
||||
raw_spin_unlock_irqrestore(&pi_state->pi_mutex.wait_lock, flags);
|
||||
}
|
||||
|
||||
if (current->pi_state_cache) {
|
||||
@@ -1503,8 +1504,10 @@ static int wake_futex_pi(u32 __user *uaddr, u32 uval, struct futex_pi_state *pi_
|
||||
*/
|
||||
newval = FUTEX_WAITERS | task_pid_vnr(new_owner);
|
||||
|
||||
if (unlikely(should_fail_futex(true)))
|
||||
if (unlikely(should_fail_futex(true))) {
|
||||
ret = -EFAULT;
|
||||
goto out_unlock;
|
||||
}
|
||||
|
||||
ret = cmpxchg_futex_value_locked(&curval, uaddr, uval, newval);
|
||||
if (!ret && (curval != uval)) {
|
||||
@@ -2378,10 +2381,22 @@ retry:
|
||||
}
|
||||
|
||||
/*
|
||||
* Since we just failed the trylock; there must be an owner.
|
||||
* The trylock just failed, so either there is an owner or
|
||||
* there is a higher priority waiter than this one.
|
||||
*/
|
||||
newowner = rt_mutex_owner(&pi_state->pi_mutex);
|
||||
BUG_ON(!newowner);
|
||||
/*
|
||||
* If the higher priority waiter has not yet taken over the
|
||||
* rtmutex then newowner is NULL. We can't return here with
|
||||
* that state because it's inconsistent vs. the user space
|
||||
* state. So drop the locks and try again. It's a valid
|
||||
* situation and not any different from the other retry
|
||||
* conditions.
|
||||
*/
|
||||
if (unlikely(!newowner)) {
|
||||
err = -EAGAIN;
|
||||
goto handle_err;
|
||||
}
|
||||
} else {
|
||||
WARN_ON_ONCE(argowner != current);
|
||||
if (oldowner == current) {
|
||||
|
||||
@@ -225,8 +225,7 @@ static long hung_timeout_jiffies(unsigned long last_checked,
|
||||
* Process updating of timeout sysctl
|
||||
*/
|
||||
int proc_dohung_task_timeout_secs(struct ctl_table *table, int write,
|
||||
void __user *buffer,
|
||||
size_t *lenp, loff_t *ppos)
|
||||
void *buffer, size_t *lenp, loff_t *ppos)
|
||||
{
|
||||
int ret;
|
||||
|
||||
|
||||
@@ -82,6 +82,7 @@ config IRQ_FASTEOI_HIERARCHY_HANDLERS
|
||||
# Generic IRQ IPI support
|
||||
config GENERIC_IRQ_IPI
|
||||
bool
|
||||
select IRQ_DOMAIN_HIERARCHY
|
||||
|
||||
# Generic MSI interrupt support
|
||||
config GENERIC_MSI_IRQ
|
||||
|
||||
@@ -1249,7 +1249,13 @@ __acquires(hlist_lock)
|
||||
|
||||
*head = &kretprobe_inst_table[hash];
|
||||
hlist_lock = kretprobe_table_lock_ptr(hash);
|
||||
raw_spin_lock_irqsave(hlist_lock, *flags);
|
||||
/*
|
||||
* Nested is a workaround that will soon not be needed.
|
||||
* There's other protections that make sure the same lock
|
||||
* is not taken on the same CPU that lockdep is unaware of.
|
||||
* Differentiate when it is taken in NMI context.
|
||||
*/
|
||||
raw_spin_lock_irqsave_nested(hlist_lock, *flags, !!in_nmi());
|
||||
}
|
||||
NOKPROBE_SYMBOL(kretprobe_hash_lock);
|
||||
|
||||
@@ -1258,7 +1264,13 @@ static void kretprobe_table_lock(unsigned long hash,
|
||||
__acquires(hlist_lock)
|
||||
{
|
||||
raw_spinlock_t *hlist_lock = kretprobe_table_lock_ptr(hash);
|
||||
raw_spin_lock_irqsave(hlist_lock, *flags);
|
||||
/*
|
||||
* Nested is a workaround that will soon not be needed.
|
||||
* There's other protections that make sure the same lock
|
||||
* is not taken on the same CPU that lockdep is unaware of.
|
||||
* Differentiate when it is taken in NMI context.
|
||||
*/
|
||||
raw_spin_lock_irqsave_nested(hlist_lock, *flags, !!in_nmi());
|
||||
}
|
||||
NOKPROBE_SYMBOL(kretprobe_table_lock);
|
||||
|
||||
@@ -2028,7 +2040,12 @@ static int pre_handler_kretprobe(struct kprobe *p, struct pt_regs *regs)
|
||||
|
||||
/* TODO: consider to only swap the RA after the last pre_handler fired */
|
||||
hash = hash_ptr(current, KPROBE_HASH_BITS);
|
||||
raw_spin_lock_irqsave(&rp->lock, flags);
|
||||
/*
|
||||
* Nested is a workaround that will soon not be needed.
|
||||
* There's other protections that make sure the same lock
|
||||
* is not taken on the same CPU that lockdep is unaware of.
|
||||
*/
|
||||
raw_spin_lock_irqsave_nested(&rp->lock, flags, 1);
|
||||
if (!hlist_empty(&rp->free_instances)) {
|
||||
ri = hlist_entry(rp->free_instances.first,
|
||||
struct kretprobe_instance, hlist);
|
||||
@@ -2039,7 +2056,7 @@ static int pre_handler_kretprobe(struct kprobe *p, struct pt_regs *regs)
|
||||
ri->task = current;
|
||||
|
||||
if (rp->entry_handler && rp->entry_handler(ri, regs)) {
|
||||
raw_spin_lock_irqsave(&rp->lock, flags);
|
||||
raw_spin_lock_irqsave_nested(&rp->lock, flags, 1);
|
||||
hlist_add_head(&ri->hlist, &rp->free_instances);
|
||||
raw_spin_unlock_irqrestore(&rp->lock, flags);
|
||||
return 0;
|
||||
|
||||
@@ -897,7 +897,8 @@ void kthread_delayed_work_timer_fn(struct timer_list *t)
|
||||
/* Move the work from worker->delayed_work_list. */
|
||||
WARN_ON_ONCE(list_empty(&work->node));
|
||||
list_del_init(&work->node);
|
||||
kthread_insert_work(worker, work, &worker->work_list);
|
||||
if (!work->canceling)
|
||||
kthread_insert_work(worker, work, &worker->work_list);
|
||||
|
||||
raw_spin_unlock_irqrestore(&worker->lock, flags);
|
||||
}
|
||||
|
||||
@@ -84,7 +84,7 @@ static inline bool lockdep_enabled(void)
|
||||
if (!debug_locks)
|
||||
return false;
|
||||
|
||||
if (raw_cpu_read(lockdep_recursion))
|
||||
if (this_cpu_read(lockdep_recursion))
|
||||
return false;
|
||||
|
||||
if (current->lockdep_recursion)
|
||||
@@ -108,19 +108,21 @@ static inline void lockdep_lock(void)
|
||||
{
|
||||
DEBUG_LOCKS_WARN_ON(!irqs_disabled());
|
||||
|
||||
__this_cpu_inc(lockdep_recursion);
|
||||
arch_spin_lock(&__lock);
|
||||
__owner = current;
|
||||
__this_cpu_inc(lockdep_recursion);
|
||||
}
|
||||
|
||||
static inline void lockdep_unlock(void)
|
||||
{
|
||||
DEBUG_LOCKS_WARN_ON(!irqs_disabled());
|
||||
|
||||
if (debug_locks && DEBUG_LOCKS_WARN_ON(__owner != current))
|
||||
return;
|
||||
|
||||
__this_cpu_dec(lockdep_recursion);
|
||||
__owner = NULL;
|
||||
arch_spin_unlock(&__lock);
|
||||
__this_cpu_dec(lockdep_recursion);
|
||||
}
|
||||
|
||||
static inline bool lockdep_assert_locked(void)
|
||||
@@ -2765,7 +2767,9 @@ print_deadlock_bug(struct task_struct *curr, struct held_lock *prev,
|
||||
* (Note that this has to be done separately, because the graph cannot
|
||||
* detect such classes of deadlocks.)
|
||||
*
|
||||
* Returns: 0 on deadlock detected, 1 on OK, 2 on recursive read
|
||||
* Returns: 0 on deadlock detected, 1 on OK, 2 if another lock with the same
|
||||
* lock class is held but nest_lock is also held, i.e. we rely on the
|
||||
* nest_lock to avoid the deadlock.
|
||||
*/
|
||||
static int
|
||||
check_deadlock(struct task_struct *curr, struct held_lock *next)
|
||||
@@ -2788,7 +2792,7 @@ check_deadlock(struct task_struct *curr, struct held_lock *next)
|
||||
* lock class (i.e. read_lock(lock)+read_lock(lock)):
|
||||
*/
|
||||
if ((next->read == 2) && prev->read)
|
||||
return 2;
|
||||
continue;
|
||||
|
||||
/*
|
||||
* We're holding the nest_lock, which serializes this lock's
|
||||
@@ -3592,16 +3596,13 @@ static int validate_chain(struct task_struct *curr,
|
||||
|
||||
if (!ret)
|
||||
return 0;
|
||||
/*
|
||||
* Mark recursive read, as we jump over it when
|
||||
* building dependencies (just like we jump over
|
||||
* trylock entries):
|
||||
*/
|
||||
if (ret == 2)
|
||||
hlock->read = 2;
|
||||
/*
|
||||
* Add dependency only if this lock is not the head
|
||||
* of the chain, and if it's not a secondary read-lock:
|
||||
* of the chain, and if the new lock introduces no more
|
||||
* lock dependency (because we already hold a lock with the
|
||||
* same lock class) nor deadlock (because the nest_lock
|
||||
* serializes nesting locks), see the comments for
|
||||
* check_deadlock().
|
||||
*/
|
||||
if (!chain_head && ret != 2) {
|
||||
if (!check_prevs_add(curr, hlock))
|
||||
@@ -4057,7 +4058,7 @@ void lockdep_hardirqs_on_prepare(unsigned long ip)
|
||||
if (unlikely(in_nmi()))
|
||||
return;
|
||||
|
||||
if (unlikely(__this_cpu_read(lockdep_recursion)))
|
||||
if (unlikely(this_cpu_read(lockdep_recursion)))
|
||||
return;
|
||||
|
||||
if (unlikely(lockdep_hardirqs_enabled())) {
|
||||
@@ -4126,7 +4127,7 @@ void noinstr lockdep_hardirqs_on(unsigned long ip)
|
||||
goto skip_checks;
|
||||
}
|
||||
|
||||
if (unlikely(__this_cpu_read(lockdep_recursion)))
|
||||
if (unlikely(this_cpu_read(lockdep_recursion)))
|
||||
return;
|
||||
|
||||
if (lockdep_hardirqs_enabled()) {
|
||||
@@ -4396,6 +4397,9 @@ static int mark_lock(struct task_struct *curr, struct held_lock *this,
|
||||
if (unlikely(hlock_class(this)->usage_mask & new_mask))
|
||||
goto unlock;
|
||||
|
||||
if (!hlock_class(this)->usage_mask)
|
||||
debug_atomic_dec(nr_unused_locks);
|
||||
|
||||
hlock_class(this)->usage_mask |= new_mask;
|
||||
|
||||
if (new_bit < LOCK_TRACE_STATES) {
|
||||
@@ -4403,19 +4407,10 @@ static int mark_lock(struct task_struct *curr, struct held_lock *this,
|
||||
return 0;
|
||||
}
|
||||
|
||||
switch (new_bit) {
|
||||
case 0 ... LOCK_USED-1:
|
||||
if (new_bit < LOCK_USED) {
|
||||
ret = mark_lock_irq(curr, this, new_bit);
|
||||
if (!ret)
|
||||
return 0;
|
||||
break;
|
||||
|
||||
case LOCK_USED:
|
||||
debug_atomic_dec(nr_unused_locks);
|
||||
break;
|
||||
|
||||
default:
|
||||
break;
|
||||
}
|
||||
|
||||
unlock:
|
||||
|
||||
@@ -605,7 +605,8 @@ void __warn(const char *file, int line, void *caller, unsigned taint,
|
||||
panic("panic_on_warn set ...\n");
|
||||
}
|
||||
|
||||
dump_stack();
|
||||
if (!regs)
|
||||
dump_stack();
|
||||
|
||||
print_irqtrace_events(current);
|
||||
|
||||
|
||||
@@ -530,7 +530,7 @@ struct module_param_attrs
|
||||
{
|
||||
unsigned int num;
|
||||
struct attribute_group grp;
|
||||
struct param_attribute attrs[0];
|
||||
struct param_attribute attrs[];
|
||||
};
|
||||
|
||||
#ifdef CONFIG_SYSFS
|
||||
|
||||
@@ -146,7 +146,7 @@ int freeze_processes(void)
|
||||
BUG_ON(in_atomic());
|
||||
|
||||
/*
|
||||
* Now that the whole userspace is frozen we need to disbale
|
||||
* Now that the whole userspace is frozen we need to disable
|
||||
* the OOM killer to disallow any further interference with
|
||||
* killable tasks. There is no guarantee oom victims will
|
||||
* ever reach a point they go away we have to wait with a timeout.
|
||||
|
||||
@@ -345,7 +345,7 @@ DESC_ID((id) - DESCS_COUNT(desc_ring))
|
||||
*/
|
||||
struct prb_data_block {
|
||||
unsigned long id;
|
||||
char data[0];
|
||||
char data[];
|
||||
};
|
||||
|
||||
/*
|
||||
|
||||
@@ -264,17 +264,11 @@ static int ptrace_check_attach(struct task_struct *child, bool ignore_state)
|
||||
return ret;
|
||||
}
|
||||
|
||||
static bool ptrace_has_cap(const struct cred *cred, struct user_namespace *ns,
|
||||
unsigned int mode)
|
||||
static bool ptrace_has_cap(struct user_namespace *ns, unsigned int mode)
|
||||
{
|
||||
int ret;
|
||||
|
||||
if (mode & PTRACE_MODE_NOAUDIT)
|
||||
ret = security_capable(cred, ns, CAP_SYS_PTRACE, CAP_OPT_NOAUDIT);
|
||||
else
|
||||
ret = security_capable(cred, ns, CAP_SYS_PTRACE, CAP_OPT_NONE);
|
||||
|
||||
return ret == 0;
|
||||
return ns_capable_noaudit(ns, CAP_SYS_PTRACE);
|
||||
return ns_capable(ns, CAP_SYS_PTRACE);
|
||||
}
|
||||
|
||||
/* Returns 0 on success, -errno on denial. */
|
||||
@@ -326,7 +320,7 @@ static int __ptrace_may_access(struct task_struct *task, unsigned int mode)
|
||||
gid_eq(caller_gid, tcred->sgid) &&
|
||||
gid_eq(caller_gid, tcred->gid))
|
||||
goto ok;
|
||||
if (ptrace_has_cap(cred, tcred->user_ns, mode))
|
||||
if (ptrace_has_cap(tcred->user_ns, mode))
|
||||
goto ok;
|
||||
rcu_read_unlock();
|
||||
return -EPERM;
|
||||
@@ -345,7 +339,7 @@ ok:
|
||||
mm = task->mm;
|
||||
if (mm &&
|
||||
((get_dumpable(mm) != SUID_DUMP_USER) &&
|
||||
!ptrace_has_cap(cred, mm->user_ns, mode)))
|
||||
!ptrace_has_cap(mm->user_ns, mode)))
|
||||
return -EPERM;
|
||||
|
||||
return security_ptrace_access_check(task, mode);
|
||||
|
||||
@@ -409,7 +409,7 @@ bool rcu_eqs_special_set(int cpu)
|
||||
*
|
||||
* The caller must have disabled interrupts and must not be idle.
|
||||
*/
|
||||
void rcu_momentary_dyntick_idle(void)
|
||||
notrace void rcu_momentary_dyntick_idle(void)
|
||||
{
|
||||
int special;
|
||||
|
||||
@@ -4076,7 +4076,6 @@ void rcu_cpu_starting(unsigned int cpu)
|
||||
smp_mb(); /* Ensure RCU read-side usage follows above initialization. */
|
||||
}
|
||||
|
||||
#ifdef CONFIG_HOTPLUG_CPU
|
||||
/*
|
||||
* The outgoing function has no further need of RCU, so remove it from
|
||||
* the rcu_node tree's ->qsmaskinitnext bit masks.
|
||||
@@ -4116,6 +4115,7 @@ void rcu_report_dead(unsigned int cpu)
|
||||
rdp->cpu_started = false;
|
||||
}
|
||||
|
||||
#ifdef CONFIG_HOTPLUG_CPU
|
||||
/*
|
||||
* The outgoing CPU has just passed through the dying-idle state, and we
|
||||
* are being invoked from the CPU that was IPIed to continue the offline
|
||||
|
||||
@@ -249,13 +249,16 @@ static bool check_slow_task(struct task_struct *t, void *arg)
|
||||
|
||||
/*
|
||||
* Scan the current list of tasks blocked within RCU read-side critical
|
||||
* sections, printing out the tid of each.
|
||||
* sections, printing out the tid of each of the first few of them.
|
||||
*/
|
||||
static int rcu_print_task_stall(struct rcu_node *rnp)
|
||||
static int rcu_print_task_stall(struct rcu_node *rnp, unsigned long flags)
|
||||
__releases(rnp->lock)
|
||||
{
|
||||
int i = 0;
|
||||
int ndetected = 0;
|
||||
struct rcu_stall_chk_rdr rscr;
|
||||
struct task_struct *t;
|
||||
struct task_struct *ts[8];
|
||||
|
||||
if (!rcu_preempt_blocked_readers_cgp(rnp))
|
||||
return 0;
|
||||
@@ -264,6 +267,14 @@ static int rcu_print_task_stall(struct rcu_node *rnp)
|
||||
t = list_entry(rnp->gp_tasks->prev,
|
||||
struct task_struct, rcu_node_entry);
|
||||
list_for_each_entry_continue(t, &rnp->blkd_tasks, rcu_node_entry) {
|
||||
get_task_struct(t);
|
||||
ts[i++] = t;
|
||||
if (i >= ARRAY_SIZE(ts))
|
||||
break;
|
||||
}
|
||||
raw_spin_unlock_irqrestore_rcu_node(rnp, flags);
|
||||
for (i--; i; i--) {
|
||||
t = ts[i];
|
||||
if (!try_invoke_on_locked_down_task(t, check_slow_task, &rscr))
|
||||
pr_cont(" P%d", t->pid);
|
||||
else
|
||||
@@ -273,6 +284,7 @@ static int rcu_print_task_stall(struct rcu_node *rnp)
|
||||
".q"[rscr.rs.b.need_qs],
|
||||
".e"[rscr.rs.b.exp_hint],
|
||||
".l"[rscr.on_blkd_list]);
|
||||
put_task_struct(t);
|
||||
ndetected++;
|
||||
}
|
||||
pr_cont("\n");
|
||||
@@ -293,8 +305,9 @@ static void rcu_print_detail_task_stall_rnp(struct rcu_node *rnp)
|
||||
* Because preemptible RCU does not exist, we never have to check for
|
||||
* tasks blocked within RCU read-side critical sections.
|
||||
*/
|
||||
static int rcu_print_task_stall(struct rcu_node *rnp)
|
||||
static int rcu_print_task_stall(struct rcu_node *rnp, unsigned long flags)
|
||||
{
|
||||
raw_spin_unlock_irqrestore_rcu_node(rnp, flags);
|
||||
return 0;
|
||||
}
|
||||
#endif /* #else #ifdef CONFIG_PREEMPT_RCU */
|
||||
@@ -472,7 +485,6 @@ static void print_other_cpu_stall(unsigned long gp_seq, unsigned long gps)
|
||||
pr_err("INFO: %s detected stalls on CPUs/tasks:\n", rcu_state.name);
|
||||
rcu_for_each_leaf_node(rnp) {
|
||||
raw_spin_lock_irqsave_rcu_node(rnp, flags);
|
||||
ndetected += rcu_print_task_stall(rnp);
|
||||
if (rnp->qsmask != 0) {
|
||||
for_each_leaf_node_possible_cpu(rnp, cpu)
|
||||
if (rnp->qsmask & leaf_node_cpu_bit(rnp, cpu)) {
|
||||
@@ -480,7 +492,7 @@ static void print_other_cpu_stall(unsigned long gp_seq, unsigned long gps)
|
||||
ndetected++;
|
||||
}
|
||||
}
|
||||
raw_spin_unlock_irqrestore_rcu_node(rnp, flags);
|
||||
ndetected += rcu_print_task_stall(rnp, flags); // Releases rnp->lock.
|
||||
}
|
||||
|
||||
for_each_possible_cpu(cpu)
|
||||
|
||||
@@ -551,22 +551,22 @@ static int __init reboot_setup(char *str)
|
||||
break;
|
||||
|
||||
case 's':
|
||||
{
|
||||
int rc;
|
||||
|
||||
if (isdigit(*(str+1))) {
|
||||
rc = kstrtoint(str+1, 0, &reboot_cpu);
|
||||
if (rc)
|
||||
return rc;
|
||||
} else if (str[1] == 'm' && str[2] == 'p' &&
|
||||
isdigit(*(str+3))) {
|
||||
rc = kstrtoint(str+3, 0, &reboot_cpu);
|
||||
if (rc)
|
||||
return rc;
|
||||
} else
|
||||
if (isdigit(*(str+1)))
|
||||
reboot_cpu = simple_strtoul(str+1, NULL, 0);
|
||||
else if (str[1] == 'm' && str[2] == 'p' &&
|
||||
isdigit(*(str+3)))
|
||||
reboot_cpu = simple_strtoul(str+3, NULL, 0);
|
||||
else
|
||||
*mode = REBOOT_SOFT;
|
||||
if (reboot_cpu >= num_possible_cpus()) {
|
||||
pr_err("Ignoring the CPU number in reboot= option. "
|
||||
"CPU %d exceeds possible cpu number %d\n",
|
||||
reboot_cpu, num_possible_cpus());
|
||||
reboot_cpu = 0;
|
||||
break;
|
||||
}
|
||||
break;
|
||||
}
|
||||
|
||||
case 'g':
|
||||
*mode = REBOOT_GPIO;
|
||||
break;
|
||||
|
||||
@@ -2976,7 +2976,12 @@ ttwu_do_activate(struct rq *rq, struct task_struct *p, int wake_flags,
|
||||
#ifdef CONFIG_SMP
|
||||
if (wake_flags & WF_MIGRATED)
|
||||
en_flags |= ENQUEUE_MIGRATED;
|
||||
else
|
||||
#endif
|
||||
if (p->in_iowait) {
|
||||
delayacct_blkio_end(p);
|
||||
atomic_dec(&task_rq(p)->nr_iowait);
|
||||
}
|
||||
|
||||
activate_task(rq, p, en_flags);
|
||||
ttwu_do_wakeup(rq, p, wake_flags, rf);
|
||||
@@ -3363,11 +3368,6 @@ try_to_wake_up(struct task_struct *p, unsigned int state, int wake_flags)
|
||||
if (READ_ONCE(p->on_rq) && ttwu_runnable(p, wake_flags))
|
||||
goto unlock;
|
||||
|
||||
if (p->in_iowait) {
|
||||
delayacct_blkio_end(p);
|
||||
atomic_dec(&task_rq(p)->nr_iowait);
|
||||
}
|
||||
|
||||
#ifdef CONFIG_SMP
|
||||
/*
|
||||
* Ensure we load p->on_cpu _after_ p->on_rq, otherwise it would be
|
||||
@@ -3438,6 +3438,11 @@ try_to_wake_up(struct task_struct *p, unsigned int state, int wake_flags)
|
||||
|
||||
cpu = select_task_rq(p, p->wake_cpu, wake_flags | WF_TTWU);
|
||||
if (task_cpu(p) != cpu) {
|
||||
if (p->in_iowait) {
|
||||
delayacct_blkio_end(p);
|
||||
atomic_dec(&task_rq(p)->nr_iowait);
|
||||
}
|
||||
|
||||
wake_flags |= WF_MIGRATED;
|
||||
psi_ttwu_dequeue(p);
|
||||
set_task_cpu(p, cpu);
|
||||
@@ -5432,20 +5437,21 @@ void rt_mutex_setprio(struct task_struct *p, struct task_struct *pi_task)
|
||||
if (!dl_prio(p->normal_prio) ||
|
||||
(pi_task && dl_prio(pi_task->prio) &&
|
||||
dl_entity_preempt(&pi_task->dl, &p->dl))) {
|
||||
p->dl.dl_boosted = 1;
|
||||
p->dl.pi_se = pi_task->dl.pi_se;
|
||||
queue_flag |= ENQUEUE_REPLENISH;
|
||||
} else
|
||||
p->dl.dl_boosted = 0;
|
||||
} else {
|
||||
p->dl.pi_se = &p->dl;
|
||||
}
|
||||
p->sched_class = &dl_sched_class;
|
||||
} else if (rt_prio(prio)) {
|
||||
if (dl_prio(oldprio))
|
||||
p->dl.dl_boosted = 0;
|
||||
p->dl.pi_se = &p->dl;
|
||||
if (oldprio < prio)
|
||||
queue_flag |= ENQUEUE_HEAD;
|
||||
p->sched_class = &rt_sched_class;
|
||||
} else {
|
||||
if (dl_prio(oldprio))
|
||||
p->dl.dl_boosted = 0;
|
||||
p->dl.pi_se = &p->dl;
|
||||
if (rt_prio(oldprio))
|
||||
p->rt.timeout = 0;
|
||||
p->sched_class = &fair_sched_class;
|
||||
|
||||
@@ -102,8 +102,12 @@ static bool sugov_should_update_freq(struct sugov_policy *sg_policy, u64 time)
|
||||
static bool sugov_update_next_freq(struct sugov_policy *sg_policy, u64 time,
|
||||
unsigned int next_freq)
|
||||
{
|
||||
if (sg_policy->next_freq == next_freq)
|
||||
return false;
|
||||
if (!sg_policy->need_freq_update) {
|
||||
if (sg_policy->next_freq == next_freq)
|
||||
return false;
|
||||
} else {
|
||||
sg_policy->need_freq_update = cpufreq_driver_test_flags(CPUFREQ_NEED_UPDATE_LIMITS);
|
||||
}
|
||||
|
||||
sg_policy->next_freq = next_freq;
|
||||
sg_policy->last_freq_update_time = time;
|
||||
@@ -164,7 +168,6 @@ static unsigned int get_next_freq(struct sugov_policy *sg_policy,
|
||||
if (freq == sg_policy->cached_raw_freq && !sg_policy->need_freq_update)
|
||||
return sg_policy->next_freq;
|
||||
|
||||
sg_policy->need_freq_update = false;
|
||||
sg_policy->cached_raw_freq = freq;
|
||||
return cpufreq_driver_resolve_freq(policy, freq);
|
||||
}
|
||||
@@ -440,7 +443,6 @@ static void sugov_update_single(struct update_util_data *hook, u64 time,
|
||||
struct sugov_policy *sg_policy = sg_cpu->sg_policy;
|
||||
unsigned long util, max;
|
||||
unsigned int next_f;
|
||||
bool busy;
|
||||
unsigned int cached_freq = sg_policy->cached_raw_freq;
|
||||
|
||||
sugov_iowait_boost(sg_cpu, time, flags);
|
||||
@@ -451,9 +453,6 @@ static void sugov_update_single(struct update_util_data *hook, u64 time,
|
||||
if (!sugov_should_update_freq(sg_policy, time))
|
||||
return;
|
||||
|
||||
/* Limits may have changed, don't skip frequency update */
|
||||
busy = !sg_policy->need_freq_update && sugov_cpu_is_busy(sg_cpu);
|
||||
|
||||
util = sugov_get_util(sg_cpu);
|
||||
max = sg_cpu->max;
|
||||
util = sugov_iowait_apply(sg_cpu, time, util, max);
|
||||
@@ -462,7 +461,7 @@ static void sugov_update_single(struct update_util_data *hook, u64 time,
|
||||
* Do not reduce the frequency if the CPU has not been idle
|
||||
* recently, as the reduction is likely to be premature then.
|
||||
*/
|
||||
if (busy && next_f < sg_policy->next_freq) {
|
||||
if (sugov_cpu_is_busy(sg_cpu) && next_f < sg_policy->next_freq) {
|
||||
next_f = sg_policy->next_freq;
|
||||
|
||||
/* Restore cached freq as next_freq has changed */
|
||||
@@ -827,9 +826,10 @@ static int sugov_start(struct cpufreq_policy *policy)
|
||||
sg_policy->next_freq = 0;
|
||||
sg_policy->work_in_progress = false;
|
||||
sg_policy->limits_changed = false;
|
||||
sg_policy->need_freq_update = false;
|
||||
sg_policy->cached_raw_freq = 0;
|
||||
|
||||
sg_policy->need_freq_update = cpufreq_driver_test_flags(CPUFREQ_NEED_UPDATE_LIMITS);
|
||||
|
||||
for_each_cpu(cpu, policy->cpus) {
|
||||
struct sugov_cpu *sg_cpu = &per_cpu(sugov_cpu, cpu);
|
||||
|
||||
@@ -881,7 +881,7 @@ static void sugov_limits(struct cpufreq_policy *policy)
|
||||
struct cpufreq_governor schedutil_gov = {
|
||||
.name = "schedutil",
|
||||
.owner = THIS_MODULE,
|
||||
.dynamic_switching = true,
|
||||
.flags = CPUFREQ_GOV_DYNAMIC_SWITCHING,
|
||||
.init = sugov_init,
|
||||
.exit = sugov_exit,
|
||||
.start = sugov_start,
|
||||
|
||||
@@ -43,6 +43,28 @@ static inline int on_dl_rq(struct sched_dl_entity *dl_se)
|
||||
return !RB_EMPTY_NODE(&dl_se->rb_node);
|
||||
}
|
||||
|
||||
#ifdef CONFIG_RT_MUTEXES
|
||||
static inline struct sched_dl_entity *pi_of(struct sched_dl_entity *dl_se)
|
||||
{
|
||||
return dl_se->pi_se;
|
||||
}
|
||||
|
||||
static inline bool is_dl_boosted(struct sched_dl_entity *dl_se)
|
||||
{
|
||||
return pi_of(dl_se) != dl_se;
|
||||
}
|
||||
#else
|
||||
static inline struct sched_dl_entity *pi_of(struct sched_dl_entity *dl_se)
|
||||
{
|
||||
return dl_se;
|
||||
}
|
||||
|
||||
static inline bool is_dl_boosted(struct sched_dl_entity *dl_se)
|
||||
{
|
||||
return false;
|
||||
}
|
||||
#endif
|
||||
|
||||
#ifdef CONFIG_SMP
|
||||
static inline struct dl_bw *dl_bw_of(int i)
|
||||
{
|
||||
@@ -714,7 +736,7 @@ static inline void setup_new_dl_entity(struct sched_dl_entity *dl_se)
|
||||
struct dl_rq *dl_rq = dl_rq_of_se(dl_se);
|
||||
struct rq *rq = rq_of_dl_rq(dl_rq);
|
||||
|
||||
WARN_ON(dl_se->dl_boosted);
|
||||
WARN_ON(is_dl_boosted(dl_se));
|
||||
WARN_ON(dl_time_before(rq_clock(rq), dl_se->deadline));
|
||||
|
||||
/*
|
||||
@@ -752,21 +774,20 @@ static inline void setup_new_dl_entity(struct sched_dl_entity *dl_se)
|
||||
* could happen are, typically, a entity voluntarily trying to overcome its
|
||||
* runtime, or it just underestimated it during sched_setattr().
|
||||
*/
|
||||
static void replenish_dl_entity(struct sched_dl_entity *dl_se,
|
||||
struct sched_dl_entity *pi_se)
|
||||
static void replenish_dl_entity(struct sched_dl_entity *dl_se)
|
||||
{
|
||||
struct dl_rq *dl_rq = dl_rq_of_se(dl_se);
|
||||
struct rq *rq = rq_of_dl_rq(dl_rq);
|
||||
|
||||
BUG_ON(pi_se->dl_runtime <= 0);
|
||||
BUG_ON(pi_of(dl_se)->dl_runtime <= 0);
|
||||
|
||||
/*
|
||||
* This could be the case for a !-dl task that is boosted.
|
||||
* Just go with full inherited parameters.
|
||||
*/
|
||||
if (dl_se->dl_deadline == 0) {
|
||||
dl_se->deadline = rq_clock(rq) + pi_se->dl_deadline;
|
||||
dl_se->runtime = pi_se->dl_runtime;
|
||||
dl_se->deadline = rq_clock(rq) + pi_of(dl_se)->dl_deadline;
|
||||
dl_se->runtime = pi_of(dl_se)->dl_runtime;
|
||||
}
|
||||
|
||||
if (dl_se->dl_yielded && dl_se->runtime > 0)
|
||||
@@ -779,8 +800,8 @@ static void replenish_dl_entity(struct sched_dl_entity *dl_se,
|
||||
* arbitrary large.
|
||||
*/
|
||||
while (dl_se->runtime <= 0) {
|
||||
dl_se->deadline += pi_se->dl_period;
|
||||
dl_se->runtime += pi_se->dl_runtime;
|
||||
dl_se->deadline += pi_of(dl_se)->dl_period;
|
||||
dl_se->runtime += pi_of(dl_se)->dl_runtime;
|
||||
}
|
||||
|
||||
/*
|
||||
@@ -794,8 +815,8 @@ static void replenish_dl_entity(struct sched_dl_entity *dl_se,
|
||||
*/
|
||||
if (dl_time_before(dl_se->deadline, rq_clock(rq))) {
|
||||
printk_deferred_once("sched: DL replenish lagged too much\n");
|
||||
dl_se->deadline = rq_clock(rq) + pi_se->dl_deadline;
|
||||
dl_se->runtime = pi_se->dl_runtime;
|
||||
dl_se->deadline = rq_clock(rq) + pi_of(dl_se)->dl_deadline;
|
||||
dl_se->runtime = pi_of(dl_se)->dl_runtime;
|
||||
}
|
||||
|
||||
if (dl_se->dl_yielded)
|
||||
@@ -828,8 +849,7 @@ static void replenish_dl_entity(struct sched_dl_entity *dl_se,
|
||||
* task with deadline equal to period this is the same of using
|
||||
* dl_period instead of dl_deadline in the equation above.
|
||||
*/
|
||||
static bool dl_entity_overflow(struct sched_dl_entity *dl_se,
|
||||
struct sched_dl_entity *pi_se, u64 t)
|
||||
static bool dl_entity_overflow(struct sched_dl_entity *dl_se, u64 t)
|
||||
{
|
||||
u64 left, right;
|
||||
|
||||
@@ -851,9 +871,9 @@ static bool dl_entity_overflow(struct sched_dl_entity *dl_se,
|
||||
* of anything below microseconds resolution is actually fiction
|
||||
* (but still we want to give the user that illusion >;).
|
||||
*/
|
||||
left = (pi_se->dl_deadline >> DL_SCALE) * (dl_se->runtime >> DL_SCALE);
|
||||
left = (pi_of(dl_se)->dl_deadline >> DL_SCALE) * (dl_se->runtime >> DL_SCALE);
|
||||
right = ((dl_se->deadline - t) >> DL_SCALE) *
|
||||
(pi_se->dl_runtime >> DL_SCALE);
|
||||
(pi_of(dl_se)->dl_runtime >> DL_SCALE);
|
||||
|
||||
return dl_time_before(right, left);
|
||||
}
|
||||
@@ -938,24 +958,23 @@ static inline bool dl_is_implicit(struct sched_dl_entity *dl_se)
|
||||
* Please refer to the comments update_dl_revised_wakeup() function to find
|
||||
* more about the Revised CBS rule.
|
||||
*/
|
||||
static void update_dl_entity(struct sched_dl_entity *dl_se,
|
||||
struct sched_dl_entity *pi_se)
|
||||
static void update_dl_entity(struct sched_dl_entity *dl_se)
|
||||
{
|
||||
struct dl_rq *dl_rq = dl_rq_of_se(dl_se);
|
||||
struct rq *rq = rq_of_dl_rq(dl_rq);
|
||||
|
||||
if (dl_time_before(dl_se->deadline, rq_clock(rq)) ||
|
||||
dl_entity_overflow(dl_se, pi_se, rq_clock(rq))) {
|
||||
dl_entity_overflow(dl_se, rq_clock(rq))) {
|
||||
|
||||
if (unlikely(!dl_is_implicit(dl_se) &&
|
||||
!dl_time_before(dl_se->deadline, rq_clock(rq)) &&
|
||||
!dl_se->dl_boosted)){
|
||||
!is_dl_boosted(dl_se))) {
|
||||
update_dl_revised_wakeup(dl_se, rq);
|
||||
return;
|
||||
}
|
||||
|
||||
dl_se->deadline = rq_clock(rq) + pi_se->dl_deadline;
|
||||
dl_se->runtime = pi_se->dl_runtime;
|
||||
dl_se->deadline = rq_clock(rq) + pi_of(dl_se)->dl_deadline;
|
||||
dl_se->runtime = pi_of(dl_se)->dl_runtime;
|
||||
}
|
||||
}
|
||||
|
||||
@@ -1054,7 +1073,7 @@ static enum hrtimer_restart dl_task_timer(struct hrtimer *timer)
|
||||
* The task might have been boosted by someone else and might be in the
|
||||
* boosting/deboosting path, its not throttled.
|
||||
*/
|
||||
if (dl_se->dl_boosted)
|
||||
if (is_dl_boosted(dl_se))
|
||||
goto unlock;
|
||||
|
||||
/*
|
||||
@@ -1082,7 +1101,7 @@ static enum hrtimer_restart dl_task_timer(struct hrtimer *timer)
|
||||
* but do not enqueue -- wait for our wakeup to do that.
|
||||
*/
|
||||
if (!task_on_rq_queued(p)) {
|
||||
replenish_dl_entity(dl_se, dl_se);
|
||||
replenish_dl_entity(dl_se);
|
||||
goto unlock;
|
||||
}
|
||||
|
||||
@@ -1172,7 +1191,7 @@ static inline void dl_check_constrained_dl(struct sched_dl_entity *dl_se)
|
||||
|
||||
if (dl_time_before(dl_se->deadline, rq_clock(rq)) &&
|
||||
dl_time_before(rq_clock(rq), dl_next_period(dl_se))) {
|
||||
if (unlikely(dl_se->dl_boosted || !start_dl_timer(p)))
|
||||
if (unlikely(is_dl_boosted(dl_se) || !start_dl_timer(p)))
|
||||
return;
|
||||
dl_se->dl_throttled = 1;
|
||||
if (dl_se->runtime > 0)
|
||||
@@ -1303,7 +1322,7 @@ throttle:
|
||||
dl_se->dl_overrun = 1;
|
||||
|
||||
__dequeue_task_dl(rq, curr, 0);
|
||||
if (unlikely(dl_se->dl_boosted || !start_dl_timer(curr)))
|
||||
if (unlikely(is_dl_boosted(dl_se) || !start_dl_timer(curr)))
|
||||
enqueue_task_dl(rq, curr, ENQUEUE_REPLENISH);
|
||||
|
||||
if (!is_leftmost(curr, &rq->dl))
|
||||
@@ -1500,8 +1519,7 @@ static void __dequeue_dl_entity(struct sched_dl_entity *dl_se)
|
||||
}
|
||||
|
||||
static void
|
||||
enqueue_dl_entity(struct sched_dl_entity *dl_se,
|
||||
struct sched_dl_entity *pi_se, int flags)
|
||||
enqueue_dl_entity(struct sched_dl_entity *dl_se, int flags)
|
||||
{
|
||||
BUG_ON(on_dl_rq(dl_se));
|
||||
|
||||
@@ -1512,9 +1530,9 @@ enqueue_dl_entity(struct sched_dl_entity *dl_se,
|
||||
*/
|
||||
if (flags & ENQUEUE_WAKEUP) {
|
||||
task_contending(dl_se, flags);
|
||||
update_dl_entity(dl_se, pi_se);
|
||||
update_dl_entity(dl_se);
|
||||
} else if (flags & ENQUEUE_REPLENISH) {
|
||||
replenish_dl_entity(dl_se, pi_se);
|
||||
replenish_dl_entity(dl_se);
|
||||
} else if ((flags & ENQUEUE_RESTORE) &&
|
||||
dl_time_before(dl_se->deadline,
|
||||
rq_clock(rq_of_dl_rq(dl_rq_of_se(dl_se))))) {
|
||||
@@ -1531,19 +1549,7 @@ static void dequeue_dl_entity(struct sched_dl_entity *dl_se)
|
||||
|
||||
static void enqueue_task_dl(struct rq *rq, struct task_struct *p, int flags)
|
||||
{
|
||||
struct task_struct *pi_task = rt_mutex_get_top_task(p);
|
||||
struct sched_dl_entity *pi_se = &p->dl;
|
||||
|
||||
/*
|
||||
* Use the scheduling parameters of the top pi-waiter task if:
|
||||
* - we have a top pi-waiter which is a SCHED_DEADLINE task AND
|
||||
* - our dl_boosted is set (i.e. the pi-waiter's (absolute) deadline is
|
||||
* smaller than our deadline OR we are a !SCHED_DEADLINE task getting
|
||||
* boosted due to a SCHED_DEADLINE pi-waiter).
|
||||
* Otherwise we keep our runtime and deadline.
|
||||
*/
|
||||
if (pi_task && dl_prio(pi_task->normal_prio) && p->dl.dl_boosted) {
|
||||
pi_se = &pi_task->dl;
|
||||
if (is_dl_boosted(&p->dl)) {
|
||||
/*
|
||||
* Because of delays in the detection of the overrun of a
|
||||
* thread's runtime, it might be the case that a thread
|
||||
@@ -1576,7 +1582,7 @@ static void enqueue_task_dl(struct rq *rq, struct task_struct *p, int flags)
|
||||
* the throttle.
|
||||
*/
|
||||
p->dl.dl_throttled = 0;
|
||||
BUG_ON(!p->dl.dl_boosted || flags != ENQUEUE_REPLENISH);
|
||||
BUG_ON(!is_dl_boosted(&p->dl) || flags != ENQUEUE_REPLENISH);
|
||||
return;
|
||||
}
|
||||
|
||||
@@ -1613,7 +1619,7 @@ static void enqueue_task_dl(struct rq *rq, struct task_struct *p, int flags)
|
||||
return;
|
||||
}
|
||||
|
||||
enqueue_dl_entity(&p->dl, pi_se, flags);
|
||||
enqueue_dl_entity(&p->dl, flags);
|
||||
|
||||
if (!task_current(rq, p) && p->nr_cpus_allowed > 1)
|
||||
enqueue_pushable_dl_task(rq, p);
|
||||
@@ -2832,11 +2838,14 @@ void __dl_clear_params(struct task_struct *p)
|
||||
dl_se->dl_bw = 0;
|
||||
dl_se->dl_density = 0;
|
||||
|
||||
dl_se->dl_boosted = 0;
|
||||
dl_se->dl_throttled = 0;
|
||||
dl_se->dl_yielded = 0;
|
||||
dl_se->dl_non_contending = 0;
|
||||
dl_se->dl_overrun = 0;
|
||||
|
||||
#ifdef CONFIG_RT_MUTEXES
|
||||
dl_se->pi_se = dl_se;
|
||||
#endif
|
||||
}
|
||||
|
||||
bool dl_param_changed(struct task_struct *p, const struct sched_attr *attr)
|
||||
|
||||
@@ -251,7 +251,7 @@ static int sd_ctl_doflags(struct ctl_table *table, int write,
|
||||
unsigned long flags = *(unsigned long *)table->data;
|
||||
size_t data_size = 0;
|
||||
size_t len = 0;
|
||||
char *tmp;
|
||||
char *tmp, *buf;
|
||||
int idx;
|
||||
|
||||
if (write)
|
||||
@@ -269,17 +269,17 @@ static int sd_ctl_doflags(struct ctl_table *table, int write,
|
||||
return 0;
|
||||
}
|
||||
|
||||
tmp = kcalloc(data_size + 1, sizeof(*tmp), GFP_KERNEL);
|
||||
if (!tmp)
|
||||
buf = kcalloc(data_size + 1, sizeof(*buf), GFP_KERNEL);
|
||||
if (!buf)
|
||||
return -ENOMEM;
|
||||
|
||||
for_each_set_bit(idx, &flags, __SD_FLAG_CNT) {
|
||||
char *name = sd_flag_debug[idx].name;
|
||||
|
||||
len += snprintf(tmp + len, strlen(name) + 2, "%s ", name);
|
||||
len += snprintf(buf + len, strlen(name) + 2, "%s ", name);
|
||||
}
|
||||
|
||||
tmp += *ppos;
|
||||
tmp = buf + *ppos;
|
||||
len -= *ppos;
|
||||
|
||||
if (len > *lenp)
|
||||
@@ -294,7 +294,7 @@ static int sd_ctl_doflags(struct ctl_table *table, int write,
|
||||
*lenp = len;
|
||||
*ppos += len;
|
||||
|
||||
kfree(tmp);
|
||||
kfree(buf);
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
@@ -5497,6 +5497,7 @@ enqueue_task_fair(struct rq *rq, struct task_struct *p, int flags)
|
||||
struct cfs_rq *cfs_rq;
|
||||
struct sched_entity *se = &p->se;
|
||||
int idle_h_nr_running = task_has_idle_policy(p);
|
||||
int task_new = !(flags & ENQUEUE_WAKEUP);
|
||||
|
||||
/*
|
||||
* The code below (indirectly) updates schedutil which looks at
|
||||
@@ -5569,7 +5570,7 @@ enqueue_task_fair(struct rq *rq, struct task_struct *p, int flags)
|
||||
* into account, but that is not straightforward to implement,
|
||||
* and the following generally works well enough in practice.
|
||||
*/
|
||||
if (flags & ENQUEUE_WAKEUP)
|
||||
if (!task_new)
|
||||
update_overutilized_status(rq);
|
||||
|
||||
enqueue_throttle:
|
||||
@@ -6195,21 +6196,21 @@ static int select_idle_cpu(struct task_struct *p, struct sched_domain *sd, int t
|
||||
static int
|
||||
select_idle_capacity(struct task_struct *p, struct sched_domain *sd, int target)
|
||||
{
|
||||
unsigned long best_cap = 0;
|
||||
unsigned long task_util, best_cap = 0;
|
||||
int cpu, best_cpu = -1;
|
||||
struct cpumask *cpus;
|
||||
|
||||
sync_entity_load_avg(&p->se);
|
||||
|
||||
cpus = this_cpu_cpumask_var_ptr(select_idle_mask);
|
||||
cpumask_and(cpus, sched_domain_span(sd), p->cpus_ptr);
|
||||
|
||||
task_util = uclamp_task_util(p);
|
||||
|
||||
for_each_cpu_wrap(cpu, cpus, target) {
|
||||
unsigned long cpu_cap = capacity_of(cpu);
|
||||
|
||||
if (!available_idle_cpu(cpu) && !sched_idle_cpu(cpu))
|
||||
continue;
|
||||
if (task_fits_capacity(p, cpu_cap))
|
||||
if (fits_capacity(task_util, cpu_cap))
|
||||
return cpu;
|
||||
|
||||
if (cpu_cap > best_cap) {
|
||||
@@ -6221,44 +6222,42 @@ select_idle_capacity(struct task_struct *p, struct sched_domain *sd, int target)
|
||||
return best_cpu;
|
||||
}
|
||||
|
||||
static inline bool asym_fits_capacity(int task_util, int cpu)
|
||||
{
|
||||
if (static_branch_unlikely(&sched_asym_cpucapacity))
|
||||
return fits_capacity(task_util, capacity_of(cpu));
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
/*
|
||||
* Try and locate an idle core/thread in the LLC cache domain.
|
||||
*/
|
||||
static int select_idle_sibling(struct task_struct *p, int prev, int target)
|
||||
{
|
||||
struct sched_domain *sd;
|
||||
unsigned long task_util;
|
||||
int i, recent_used_cpu;
|
||||
|
||||
/*
|
||||
* For asymmetric CPU capacity systems, our domain of interest is
|
||||
* sd_asym_cpucapacity rather than sd_llc.
|
||||
* On asymmetric system, update task utilization because we will check
|
||||
* that the task fits with cpu's capacity.
|
||||
*/
|
||||
if (static_branch_unlikely(&sched_asym_cpucapacity)) {
|
||||
sd = rcu_dereference(per_cpu(sd_asym_cpucapacity, target));
|
||||
/*
|
||||
* On an asymmetric CPU capacity system where an exclusive
|
||||
* cpuset defines a symmetric island (i.e. one unique
|
||||
* capacity_orig value through the cpuset), the key will be set
|
||||
* but the CPUs within that cpuset will not have a domain with
|
||||
* SD_ASYM_CPUCAPACITY. These should follow the usual symmetric
|
||||
* capacity path.
|
||||
*/
|
||||
if (!sd)
|
||||
goto symmetric;
|
||||
|
||||
i = select_idle_capacity(p, sd, target);
|
||||
return ((unsigned)i < nr_cpumask_bits) ? i : target;
|
||||
sync_entity_load_avg(&p->se);
|
||||
task_util = uclamp_task_util(p);
|
||||
}
|
||||
|
||||
symmetric:
|
||||
if (available_idle_cpu(target) || sched_idle_cpu(target))
|
||||
if ((available_idle_cpu(target) || sched_idle_cpu(target)) &&
|
||||
asym_fits_capacity(task_util, target))
|
||||
return target;
|
||||
|
||||
/*
|
||||
* If the previous CPU is cache affine and idle, don't be stupid:
|
||||
*/
|
||||
if (prev != target && cpus_share_cache(prev, target) &&
|
||||
(available_idle_cpu(prev) || sched_idle_cpu(prev)))
|
||||
(available_idle_cpu(prev) || sched_idle_cpu(prev)) &&
|
||||
asym_fits_capacity(task_util, prev))
|
||||
return prev;
|
||||
|
||||
/*
|
||||
@@ -6281,7 +6280,8 @@ symmetric:
|
||||
recent_used_cpu != target &&
|
||||
cpus_share_cache(recent_used_cpu, target) &&
|
||||
(available_idle_cpu(recent_used_cpu) || sched_idle_cpu(recent_used_cpu)) &&
|
||||
cpumask_test_cpu(p->recent_used_cpu, p->cpus_ptr)) {
|
||||
cpumask_test_cpu(p->recent_used_cpu, p->cpus_ptr) &&
|
||||
asym_fits_capacity(task_util, recent_used_cpu)) {
|
||||
/*
|
||||
* Replace recent_used_cpu with prev as it is a potential
|
||||
* candidate for the next wake:
|
||||
@@ -6290,6 +6290,26 @@ symmetric:
|
||||
return recent_used_cpu;
|
||||
}
|
||||
|
||||
/*
|
||||
* For asymmetric CPU capacity systems, our domain of interest is
|
||||
* sd_asym_cpucapacity rather than sd_llc.
|
||||
*/
|
||||
if (static_branch_unlikely(&sched_asym_cpucapacity)) {
|
||||
sd = rcu_dereference(per_cpu(sd_asym_cpucapacity, target));
|
||||
/*
|
||||
* On an asymmetric CPU capacity system where an exclusive
|
||||
* cpuset defines a symmetric island (i.e. one unique
|
||||
* capacity_orig value through the cpuset), the key will be set
|
||||
* but the CPUs within that cpuset will not have a domain with
|
||||
* SD_ASYM_CPUCAPACITY. These should follow the usual symmetric
|
||||
* capacity path.
|
||||
*/
|
||||
if (sd) {
|
||||
i = select_idle_capacity(p, sd, target);
|
||||
return ((unsigned)i < nr_cpumask_bits) ? i : target;
|
||||
}
|
||||
}
|
||||
|
||||
sd = rcu_dereference(per_cpu(sd_llc, target));
|
||||
if (!sd)
|
||||
return target;
|
||||
@@ -9070,7 +9090,8 @@ static inline void calculate_imbalance(struct lb_env *env, struct sd_lb_stats *s
|
||||
* emptying busiest.
|
||||
*/
|
||||
if (local->group_type == group_has_spare) {
|
||||
if (busiest->group_type > group_fully_busy) {
|
||||
if ((busiest->group_type > group_fully_busy) &&
|
||||
!(env->sd->flags & SD_SHARE_PKG_RESOURCES)) {
|
||||
/*
|
||||
* If busiest is overloaded, try to fill spare
|
||||
* capacity. This might end up creating spare capacity
|
||||
|
||||
@@ -38,7 +38,7 @@
|
||||
#include <linux/filter.h>
|
||||
#include <linux/pid.h>
|
||||
#include <linux/ptrace.h>
|
||||
#include <linux/security.h>
|
||||
#include <linux/capability.h>
|
||||
#include <linux/tracehook.h>
|
||||
#include <linux/uaccess.h>
|
||||
#include <linux/anon_inodes.h>
|
||||
@@ -558,8 +558,7 @@ static struct seccomp_filter *seccomp_prepare_filter(struct sock_fprog *fprog)
|
||||
* behavior of privileged children.
|
||||
*/
|
||||
if (!task_no_new_privs(current) &&
|
||||
security_capable(current_cred(), current_user_ns(),
|
||||
CAP_SYS_ADMIN, CAP_OPT_NOAUDIT) != 0)
|
||||
!ns_capable_noaudit(current_user_ns(), CAP_SYS_ADMIN))
|
||||
return ERR_PTR(-EACCES);
|
||||
|
||||
/* Allocate a new seccomp_filter */
|
||||
|
||||
@@ -391,16 +391,17 @@ static bool task_participate_group_stop(struct task_struct *task)
|
||||
|
||||
void task_join_group_stop(struct task_struct *task)
|
||||
{
|
||||
unsigned long mask = current->jobctl & JOBCTL_STOP_SIGMASK;
|
||||
struct signal_struct *sig = current->signal;
|
||||
|
||||
if (sig->group_stop_count) {
|
||||
sig->group_stop_count++;
|
||||
mask |= JOBCTL_STOP_CONSUME;
|
||||
} else if (!(sig->flags & SIGNAL_STOP_STOPPED))
|
||||
return;
|
||||
|
||||
/* Have the new thread join an on-going signal group stop */
|
||||
unsigned long jobctl = current->jobctl;
|
||||
if (jobctl & JOBCTL_STOP_PENDING) {
|
||||
struct signal_struct *sig = current->signal;
|
||||
unsigned long signr = jobctl & JOBCTL_STOP_SIGMASK;
|
||||
unsigned long gstop = JOBCTL_STOP_PENDING | JOBCTL_STOP_CONSUME;
|
||||
if (task_set_jobctl_pending(task, signr | gstop)) {
|
||||
sig->group_stop_count++;
|
||||
}
|
||||
}
|
||||
task_set_jobctl_pending(task, mask | JOBCTL_STOP_PENDING);
|
||||
}
|
||||
|
||||
/*
|
||||
|
||||
@@ -130,7 +130,7 @@ static __always_inline int csd_lock_wait_getcpu(call_single_data_t *csd)
|
||||
|
||||
csd_type = CSD_TYPE(csd);
|
||||
if (csd_type == CSD_TYPE_ASYNC || csd_type == CSD_TYPE_SYNC)
|
||||
return csd->dst; /* Other CSD_TYPE_ values might not have ->dst. */
|
||||
return csd->node.dst; /* Other CSD_TYPE_ values might not have ->dst. */
|
||||
return -1;
|
||||
}
|
||||
|
||||
|
||||
@@ -194,7 +194,7 @@ static void ack_state(struct multi_stop_data *msdata)
|
||||
set_state(msdata, msdata->state + 1);
|
||||
}
|
||||
|
||||
void __weak stop_machine_yield(const struct cpumask *cpumask)
|
||||
notrace void __weak stop_machine_yield(const struct cpumask *cpumask)
|
||||
{
|
||||
cpu_relax();
|
||||
}
|
||||
|
||||
@@ -425,11 +425,6 @@ static inline void debug_hrtimer_deactivate(struct hrtimer *timer)
|
||||
debug_object_deactivate(timer, &hrtimer_debug_descr);
|
||||
}
|
||||
|
||||
static inline void debug_hrtimer_free(struct hrtimer *timer)
|
||||
{
|
||||
debug_object_free(timer, &hrtimer_debug_descr);
|
||||
}
|
||||
|
||||
static void __hrtimer_init(struct hrtimer *timer, clockid_t clock_id,
|
||||
enum hrtimer_mode mode);
|
||||
|
||||
|
||||
@@ -172,10 +172,6 @@ static void set_cpu_itimer(struct task_struct *tsk, unsigned int clock_id,
|
||||
u64 oval, nval, ointerval, ninterval;
|
||||
struct cpu_itimer *it = &tsk->signal->it[clock_id];
|
||||
|
||||
/*
|
||||
* Use the to_ktime conversion because that clamps the maximum
|
||||
* value to KTIME_MAX and avoid multiplication overflows.
|
||||
*/
|
||||
nval = timespec64_to_ns(&value->it_value);
|
||||
ninterval = timespec64_to_ns(&value->it_interval);
|
||||
|
||||
|
||||
@@ -68,13 +68,13 @@ static inline u64 notrace cyc_to_ns(u64 cyc, u32 mult, u32 shift)
|
||||
return (cyc * mult) >> shift;
|
||||
}
|
||||
|
||||
struct clock_read_data *sched_clock_read_begin(unsigned int *seq)
|
||||
notrace struct clock_read_data *sched_clock_read_begin(unsigned int *seq)
|
||||
{
|
||||
*seq = raw_read_seqcount_latch(&cd.seq);
|
||||
return cd.read_data + (*seq & 1);
|
||||
}
|
||||
|
||||
int sched_clock_read_retry(unsigned int seq)
|
||||
notrace int sched_clock_read_retry(unsigned int seq)
|
||||
{
|
||||
return read_seqcount_latch_retry(&cd.seq, seq);
|
||||
}
|
||||
|
||||
@@ -732,11 +732,6 @@ static inline void debug_timer_deactivate(struct timer_list *timer)
|
||||
debug_object_deactivate(timer, &timer_debug_descr);
|
||||
}
|
||||
|
||||
static inline void debug_timer_free(struct timer_list *timer)
|
||||
{
|
||||
debug_object_free(timer, &timer_debug_descr);
|
||||
}
|
||||
|
||||
static inline void debug_timer_assert_init(struct timer_list *timer)
|
||||
{
|
||||
debug_object_assert_init(timer, &timer_debug_descr);
|
||||
|
||||
@@ -181,6 +181,16 @@ bpf_probe_read_user_str_common(void *dst, u32 size,
|
||||
{
|
||||
int ret;
|
||||
|
||||
/*
|
||||
* NB: We rely on strncpy_from_user() not copying junk past the NUL
|
||||
* terminator into `dst`.
|
||||
*
|
||||
* strncpy_from_user() does long-sized strides in the fast path. If the
|
||||
* strncpy does not mask out the bytes after the NUL in `unsafe_ptr`,
|
||||
* then there could be junk after the NUL in `dst`. If user takes `dst`
|
||||
* and keys a hash map with it, then semantically identical strings can
|
||||
* occupy multiple entries in the map.
|
||||
*/
|
||||
ret = strncpy_from_user_nofault(dst, unsafe_ptr, size);
|
||||
if (unlikely(ret < 0))
|
||||
memset(dst, 0, size);
|
||||
@@ -1198,7 +1208,7 @@ static int bpf_btf_printf_prepare(struct btf_ptr *ptr, u32 btf_ptr_size,
|
||||
*btf = bpf_get_btf_vmlinux();
|
||||
|
||||
if (IS_ERR_OR_NULL(*btf))
|
||||
return PTR_ERR(*btf);
|
||||
return IS_ERR(*btf) ? PTR_ERR(*btf) : -EINVAL;
|
||||
|
||||
if (ptr->type_id > 0)
|
||||
*btf_id = ptr->type_id;
|
||||
|
||||
@@ -438,14 +438,16 @@ enum {
|
||||
};
|
||||
/*
|
||||
* Used for which event context the event is in.
|
||||
* NMI = 0
|
||||
* IRQ = 1
|
||||
* SOFTIRQ = 2
|
||||
* NORMAL = 3
|
||||
* TRANSITION = 0
|
||||
* NMI = 1
|
||||
* IRQ = 2
|
||||
* SOFTIRQ = 3
|
||||
* NORMAL = 4
|
||||
*
|
||||
* See trace_recursive_lock() comment below for more details.
|
||||
*/
|
||||
enum {
|
||||
RB_CTX_TRANSITION,
|
||||
RB_CTX_NMI,
|
||||
RB_CTX_IRQ,
|
||||
RB_CTX_SOFTIRQ,
|
||||
@@ -3014,10 +3016,10 @@ rb_wakeups(struct trace_buffer *buffer, struct ring_buffer_per_cpu *cpu_buffer)
|
||||
* a bit of overhead in something as critical as function tracing,
|
||||
* we use a bitmask trick.
|
||||
*
|
||||
* bit 0 = NMI context
|
||||
* bit 1 = IRQ context
|
||||
* bit 2 = SoftIRQ context
|
||||
* bit 3 = normal context.
|
||||
* bit 1 = NMI context
|
||||
* bit 2 = IRQ context
|
||||
* bit 3 = SoftIRQ context
|
||||
* bit 4 = normal context.
|
||||
*
|
||||
* This works because this is the order of contexts that can
|
||||
* preempt other contexts. A SoftIRQ never preempts an IRQ
|
||||
@@ -3040,6 +3042,30 @@ rb_wakeups(struct trace_buffer *buffer, struct ring_buffer_per_cpu *cpu_buffer)
|
||||
* The least significant bit can be cleared this way, and it
|
||||
* just so happens that it is the same bit corresponding to
|
||||
* the current context.
|
||||
*
|
||||
* Now the TRANSITION bit breaks the above slightly. The TRANSITION bit
|
||||
* is set when a recursion is detected at the current context, and if
|
||||
* the TRANSITION bit is already set, it will fail the recursion.
|
||||
* This is needed because there's a lag between the changing of
|
||||
* interrupt context and updating the preempt count. In this case,
|
||||
* a false positive will be found. To handle this, one extra recursion
|
||||
* is allowed, and this is done by the TRANSITION bit. If the TRANSITION
|
||||
* bit is already set, then it is considered a recursion and the function
|
||||
* ends. Otherwise, the TRANSITION bit is set, and that bit is returned.
|
||||
*
|
||||
* On the trace_recursive_unlock(), the TRANSITION bit will be the first
|
||||
* to be cleared. Even if it wasn't the context that set it. That is,
|
||||
* if an interrupt comes in while NORMAL bit is set and the ring buffer
|
||||
* is called before preempt_count() is updated, since the check will
|
||||
* be on the NORMAL bit, the TRANSITION bit will then be set. If an
|
||||
* NMI then comes in, it will set the NMI bit, but when the NMI code
|
||||
* does the trace_recursive_unlock() it will clear the TRANSTION bit
|
||||
* and leave the NMI bit set. But this is fine, because the interrupt
|
||||
* code that set the TRANSITION bit will then clear the NMI bit when it
|
||||
* calls trace_recursive_unlock(). If another NMI comes in, it will
|
||||
* set the TRANSITION bit and continue.
|
||||
*
|
||||
* Note: The TRANSITION bit only handles a single transition between context.
|
||||
*/
|
||||
|
||||
static __always_inline int
|
||||
@@ -3055,8 +3081,16 @@ trace_recursive_lock(struct ring_buffer_per_cpu *cpu_buffer)
|
||||
bit = pc & NMI_MASK ? RB_CTX_NMI :
|
||||
pc & HARDIRQ_MASK ? RB_CTX_IRQ : RB_CTX_SOFTIRQ;
|
||||
|
||||
if (unlikely(val & (1 << (bit + cpu_buffer->nest))))
|
||||
return 1;
|
||||
if (unlikely(val & (1 << (bit + cpu_buffer->nest)))) {
|
||||
/*
|
||||
* It is possible that this was called by transitioning
|
||||
* between interrupt context, and preempt_count() has not
|
||||
* been updated yet. In this case, use the TRANSITION bit.
|
||||
*/
|
||||
bit = RB_CTX_TRANSITION;
|
||||
if (val & (1 << (bit + cpu_buffer->nest)))
|
||||
return 1;
|
||||
}
|
||||
|
||||
val |= (1 << (bit + cpu_buffer->nest));
|
||||
cpu_buffer->current_context = val;
|
||||
@@ -3071,8 +3105,8 @@ trace_recursive_unlock(struct ring_buffer_per_cpu *cpu_buffer)
|
||||
cpu_buffer->current_context - (1 << cpu_buffer->nest);
|
||||
}
|
||||
|
||||
/* The recursive locking above uses 4 bits */
|
||||
#define NESTED_BITS 4
|
||||
/* The recursive locking above uses 5 bits */
|
||||
#define NESTED_BITS 5
|
||||
|
||||
/**
|
||||
* ring_buffer_nest_start - Allow to trace while nested
|
||||
|
||||
@@ -2750,7 +2750,7 @@ trace_event_buffer_lock_reserve(struct trace_buffer **current_rb,
|
||||
/*
|
||||
* If tracing is off, but we have triggers enabled
|
||||
* we still need to look at the event data. Use the temp_buffer
|
||||
* to store the trace event for the tigger to use. It's recusive
|
||||
* to store the trace event for the trigger to use. It's recursive
|
||||
* safe and will not be recorded anywhere.
|
||||
*/
|
||||
if (!entry && trace_file->flags & EVENT_FILE_FL_TRIGGER_COND) {
|
||||
@@ -2952,7 +2952,7 @@ static void __ftrace_trace_stack(struct trace_buffer *buffer,
|
||||
stackidx = __this_cpu_inc_return(ftrace_stack_reserve) - 1;
|
||||
|
||||
/* This should never happen. If it does, yell once and skip */
|
||||
if (WARN_ON_ONCE(stackidx > FTRACE_KSTACK_NESTING))
|
||||
if (WARN_ON_ONCE(stackidx >= FTRACE_KSTACK_NESTING))
|
||||
goto out;
|
||||
|
||||
/*
|
||||
@@ -3132,7 +3132,7 @@ static char *get_trace_buf(void)
|
||||
|
||||
/* Interrupts must see nesting incremented before we use the buffer */
|
||||
barrier();
|
||||
return &buffer->buffer[buffer->nesting][0];
|
||||
return &buffer->buffer[buffer->nesting - 1][0];
|
||||
}
|
||||
|
||||
static void put_trace_buf(void)
|
||||
|
||||
@@ -637,6 +637,12 @@ enum {
|
||||
* function is called to clear it.
|
||||
*/
|
||||
TRACE_GRAPH_NOTRACE_BIT,
|
||||
|
||||
/*
|
||||
* When transitioning between context, the preempt_count() may
|
||||
* not be correct. Allow for a single recursion to cover this case.
|
||||
*/
|
||||
TRACE_TRANSITION_BIT,
|
||||
};
|
||||
|
||||
#define trace_recursion_set(bit) do { (current)->trace_recursion |= (1<<(bit)); } while (0)
|
||||
@@ -691,14 +697,27 @@ static __always_inline int trace_test_and_set_recursion(int start, int max)
|
||||
return 0;
|
||||
|
||||
bit = trace_get_context_bit() + start;
|
||||
if (unlikely(val & (1 << bit)))
|
||||
return -1;
|
||||
if (unlikely(val & (1 << bit))) {
|
||||
/*
|
||||
* It could be that preempt_count has not been updated during
|
||||
* a switch between contexts. Allow for a single recursion.
|
||||
*/
|
||||
bit = TRACE_TRANSITION_BIT;
|
||||
if (trace_recursion_test(bit))
|
||||
return -1;
|
||||
trace_recursion_set(bit);
|
||||
barrier();
|
||||
return bit + 1;
|
||||
}
|
||||
|
||||
/* Normal check passed, clear the transition to allow it again */
|
||||
trace_recursion_clear(TRACE_TRANSITION_BIT);
|
||||
|
||||
val |= 1 << bit;
|
||||
current->trace_recursion = val;
|
||||
barrier();
|
||||
|
||||
return bit;
|
||||
return bit + 1;
|
||||
}
|
||||
|
||||
static __always_inline void trace_clear_recursion(int bit)
|
||||
@@ -708,6 +727,7 @@ static __always_inline void trace_clear_recursion(int bit)
|
||||
if (!bit)
|
||||
return;
|
||||
|
||||
bit--;
|
||||
bit = 1 << bit;
|
||||
val &= ~bit;
|
||||
|
||||
|
||||
@@ -584,7 +584,7 @@ static struct synth_field *parse_synth_field(int argc, const char **argv,
|
||||
{
|
||||
struct synth_field *field;
|
||||
const char *prefix = NULL, *field_type = argv[0], *field_name, *array;
|
||||
int len, ret = 0;
|
||||
int len, ret = -ENOMEM;
|
||||
struct seq_buf s;
|
||||
ssize_t size;
|
||||
|
||||
@@ -617,10 +617,9 @@ static struct synth_field *parse_synth_field(int argc, const char **argv,
|
||||
len--;
|
||||
|
||||
field->name = kmemdup_nul(field_name, len, GFP_KERNEL);
|
||||
if (!field->name) {
|
||||
ret = -ENOMEM;
|
||||
if (!field->name)
|
||||
goto free;
|
||||
}
|
||||
|
||||
if (!is_good_name(field->name)) {
|
||||
synth_err(SYNTH_ERR_BAD_NAME, errpos(field_name));
|
||||
ret = -EINVAL;
|
||||
@@ -638,10 +637,9 @@ static struct synth_field *parse_synth_field(int argc, const char **argv,
|
||||
len += strlen(prefix);
|
||||
|
||||
field->type = kzalloc(len, GFP_KERNEL);
|
||||
if (!field->type) {
|
||||
ret = -ENOMEM;
|
||||
if (!field->type)
|
||||
goto free;
|
||||
}
|
||||
|
||||
seq_buf_init(&s, field->type, len);
|
||||
if (prefix)
|
||||
seq_buf_puts(&s, prefix);
|
||||
@@ -653,6 +651,7 @@ static struct synth_field *parse_synth_field(int argc, const char **argv,
|
||||
}
|
||||
if (WARN_ON_ONCE(!seq_buf_buffer_left(&s)))
|
||||
goto free;
|
||||
|
||||
s.buffer[s.len] = '\0';
|
||||
|
||||
size = synth_field_size(field->type);
|
||||
@@ -666,10 +665,8 @@ static struct synth_field *parse_synth_field(int argc, const char **argv,
|
||||
|
||||
len = sizeof("__data_loc ") + strlen(field->type) + 1;
|
||||
type = kzalloc(len, GFP_KERNEL);
|
||||
if (!type) {
|
||||
ret = -ENOMEM;
|
||||
if (!type)
|
||||
goto free;
|
||||
}
|
||||
|
||||
seq_buf_init(&s, type, len);
|
||||
seq_buf_puts(&s, "__data_loc ");
|
||||
|
||||
@@ -492,8 +492,13 @@ trace_selftest_function_recursion(void)
|
||||
unregister_ftrace_function(&test_rec_probe);
|
||||
|
||||
ret = -1;
|
||||
if (trace_selftest_recursion_cnt != 1) {
|
||||
pr_cont("*callback not called once (%d)* ",
|
||||
/*
|
||||
* Recursion allows for transitions between context,
|
||||
* and may call the callback twice.
|
||||
*/
|
||||
if (trace_selftest_recursion_cnt != 1 &&
|
||||
trace_selftest_recursion_cnt != 2) {
|
||||
pr_cont("*callback not called once (or twice) (%d)* ",
|
||||
trace_selftest_recursion_cnt);
|
||||
goto out;
|
||||
}
|
||||
|
||||
@@ -50,7 +50,7 @@ static bool ok_to_free_tracepoints;
|
||||
*/
|
||||
struct tp_probes {
|
||||
struct rcu_head rcu;
|
||||
struct tracepoint_func probes[0];
|
||||
struct tracepoint_func probes[];
|
||||
};
|
||||
|
||||
static inline void *allocate_probes(int count)
|
||||
|
||||
@@ -44,8 +44,6 @@ int __read_mostly soft_watchdog_user_enabled = 1;
|
||||
int __read_mostly watchdog_thresh = 10;
|
||||
static int __read_mostly nmi_watchdog_available;
|
||||
|
||||
static struct cpumask watchdog_allowed_mask __read_mostly;
|
||||
|
||||
struct cpumask watchdog_cpumask __read_mostly;
|
||||
unsigned long *watchdog_cpumask_bits = cpumask_bits(&watchdog_cpumask);
|
||||
|
||||
@@ -162,6 +160,8 @@ static void lockup_detector_update_enable(void)
|
||||
int __read_mostly sysctl_softlockup_all_cpu_backtrace;
|
||||
#endif
|
||||
|
||||
static struct cpumask watchdog_allowed_mask __read_mostly;
|
||||
|
||||
/* Global variables, exported for sysctl */
|
||||
unsigned int __read_mostly softlockup_panic =
|
||||
CONFIG_BOOTPARAM_SOFTLOCKUP_PANIC_VALUE;
|
||||
|
||||
Reference in New Issue
Block a user