Merge branch 'ctxt.2022.07.05a' into HEAD
ctxt.2022.07.05a: Linux-kernel memory model development branch.
This commit is contained in:
commit
34bc7b454d
@ -1844,10 +1844,10 @@ that meets this requirement.
|
||||
|
||||
Furthermore, NMI handlers can be interrupted by what appear to RCU to be
|
||||
normal interrupts. One way that this can happen is for code that
|
||||
directly invokes rcu_irq_enter() and rcu_irq_exit() to be called
|
||||
directly invokes ct_irq_enter() and ct_irq_exit() to be called
|
||||
from an NMI handler. This astonishing fact of life prompted the current
|
||||
code structure, which has rcu_irq_enter() invoking
|
||||
rcu_nmi_enter() and rcu_irq_exit() invoking rcu_nmi_exit().
|
||||
code structure, which has ct_irq_enter() invoking
|
||||
ct_nmi_enter() and ct_irq_exit() invoking ct_nmi_exit().
|
||||
And yes, I also learned of this requirement the hard way.
|
||||
|
||||
Loadable Modules
|
||||
@ -2195,7 +2195,7 @@ scheduling-clock interrupt be enabled when RCU needs it to be:
|
||||
sections, and RCU believes this CPU to be idle, no problem. This
|
||||
sort of thing is used by some architectures for light-weight
|
||||
exception handlers, which can then avoid the overhead of
|
||||
rcu_irq_enter() and rcu_irq_exit() at exception entry and
|
||||
ct_irq_enter() and ct_irq_exit() at exception entry and
|
||||
exit, respectively. Some go further and avoid the entireties of
|
||||
irq_enter() and irq_exit().
|
||||
Just make very sure you are running some of your tests with
|
||||
@ -2226,7 +2226,7 @@ scheduling-clock interrupt be enabled when RCU needs it to be:
|
||||
+-----------------------------------------------------------------------+
|
||||
| **Answer**: |
|
||||
+-----------------------------------------------------------------------+
|
||||
| One approach is to do ``rcu_irq_exit();rcu_irq_enter();`` every so |
|
||||
| One approach is to do ``ct_irq_exit();ct_irq_enter();`` every so |
|
||||
| often. But given that long-running interrupt handlers can cause other |
|
||||
| problems, not least for response time, shouldn't you work to keep |
|
||||
| your interrupt handler's runtime within reasonable bounds? |
|
||||
|
@ -97,12 +97,12 @@ warnings:
|
||||
which will include additional debugging information.
|
||||
|
||||
- A low-level kernel issue that either fails to invoke one of the
|
||||
variants of rcu_user_enter(), rcu_user_exit(), rcu_idle_enter(),
|
||||
rcu_idle_exit(), rcu_irq_enter(), or rcu_irq_exit() on the one
|
||||
variants of rcu_eqs_enter(true), rcu_eqs_exit(true), ct_idle_enter(),
|
||||
ct_idle_exit(), ct_irq_enter(), or ct_irq_exit() on the one
|
||||
hand, or that invokes one of them too many times on the other.
|
||||
Historically, the most frequent issue has been an omission
|
||||
of either irq_enter() or irq_exit(), which in turn invoke
|
||||
rcu_irq_enter() or rcu_irq_exit(), respectively. Building your
|
||||
ct_irq_enter() or ct_irq_exit(), respectively. Building your
|
||||
kernel with CONFIG_RCU_EQS_DEBUG=y can help track down these types
|
||||
of issues, which sometimes arise in architecture-specific code.
|
||||
|
||||
|
@ -1,7 +1,7 @@
|
||||
#
|
||||
# Feature name: context-tracking
|
||||
# Kconfig: HAVE_CONTEXT_TRACKING
|
||||
# description: arch supports context tracking for NO_HZ_FULL
|
||||
# Feature name: user-context-tracking
|
||||
# Kconfig: HAVE_CONTEXT_TRACKING_USER
|
||||
# description: arch supports user context tracking for NO_HZ_FULL
|
||||
#
|
||||
-----------------------
|
||||
| arch |status|
|
||||
|
@ -5039,6 +5039,7 @@ F: include/linux/console*
|
||||
|
||||
CONTEXT TRACKING
|
||||
M: Frederic Weisbecker <frederic@kernel.org>
|
||||
M: "Paul E. McKenney" <paulmck@kernel.org>
|
||||
S: Maintained
|
||||
F: kernel/context_tracking.c
|
||||
F: include/linux/context_tracking*
|
||||
|
@ -774,7 +774,7 @@ config HAVE_ARCH_WITHIN_STACK_FRAMES
|
||||
and similar) by implementing an inline arch_within_stack_frames(),
|
||||
which is used by CONFIG_HARDENED_USERCOPY.
|
||||
|
||||
config HAVE_CONTEXT_TRACKING
|
||||
config HAVE_CONTEXT_TRACKING_USER
|
||||
bool
|
||||
help
|
||||
Provide kernel/user boundaries probes necessary for subsystems
|
||||
@ -782,10 +782,10 @@ config HAVE_CONTEXT_TRACKING
|
||||
Syscalls need to be wrapped inside user_exit()-user_enter(), either
|
||||
optimized behind static key or through the slow path using TIF_NOHZ
|
||||
flag. Exceptions handlers must be wrapped as well. Irqs are already
|
||||
protected inside rcu_irq_enter/rcu_irq_exit() but preemption or signal
|
||||
protected inside ct_irq_enter/ct_irq_exit() but preemption or signal
|
||||
handling on irq exit still need to be protected.
|
||||
|
||||
config HAVE_CONTEXT_TRACKING_OFFSTACK
|
||||
config HAVE_CONTEXT_TRACKING_USER_OFFSTACK
|
||||
bool
|
||||
help
|
||||
Architecture neither relies on exception_enter()/exception_exit()
|
||||
@ -797,7 +797,7 @@ config HAVE_CONTEXT_TRACKING_OFFSTACK
|
||||
|
||||
- Critical entry code isn't preemptible (or better yet:
|
||||
not interruptible).
|
||||
- No use of RCU read side critical sections, unless rcu_nmi_enter()
|
||||
- No use of RCU read side critical sections, unless ct_nmi_enter()
|
||||
got called.
|
||||
- No use of instrumentation, unless instrumentation_begin() got
|
||||
called.
|
||||
|
@ -84,7 +84,7 @@ config ARM
|
||||
select HAVE_ARCH_TRANSPARENT_HUGEPAGE if ARM_LPAE
|
||||
select HAVE_ARM_SMCCC if CPU_V7
|
||||
select HAVE_EBPF_JIT if !CPU_ENDIAN_BE32
|
||||
select HAVE_CONTEXT_TRACKING
|
||||
select HAVE_CONTEXT_TRACKING_USER
|
||||
select HAVE_C_RECORDMCOUNT
|
||||
select HAVE_BUILDTIME_MCOUNT_SORT
|
||||
select HAVE_DEBUG_KMEMLEAK if !XIP_KERNEL
|
||||
|
@ -28,7 +28,7 @@
|
||||
#include "entry-header.S"
|
||||
|
||||
saved_psr .req r8
|
||||
#if defined(CONFIG_TRACE_IRQFLAGS) || defined(CONFIG_CONTEXT_TRACKING)
|
||||
#if defined(CONFIG_TRACE_IRQFLAGS) || defined(CONFIG_CONTEXT_TRACKING_USER)
|
||||
saved_pc .req r9
|
||||
#define TRACE(x...) x
|
||||
#else
|
||||
@ -38,7 +38,7 @@ saved_pc .req lr
|
||||
|
||||
.section .entry.text,"ax",%progbits
|
||||
.align 5
|
||||
#if !(IS_ENABLED(CONFIG_TRACE_IRQFLAGS) || IS_ENABLED(CONFIG_CONTEXT_TRACKING) || \
|
||||
#if !(IS_ENABLED(CONFIG_TRACE_IRQFLAGS) || IS_ENABLED(CONFIG_CONTEXT_TRACKING_USER) || \
|
||||
IS_ENABLED(CONFIG_DEBUG_RSEQ))
|
||||
/*
|
||||
* This is the fast syscall return path. We do as little as possible here,
|
||||
|
@ -366,25 +366,25 @@ ALT_UP_B(.L1_\@)
|
||||
* between user and kernel mode.
|
||||
*/
|
||||
.macro ct_user_exit, save = 1
|
||||
#ifdef CONFIG_CONTEXT_TRACKING
|
||||
#ifdef CONFIG_CONTEXT_TRACKING_USER
|
||||
.if \save
|
||||
stmdb sp!, {r0-r3, ip, lr}
|
||||
bl context_tracking_user_exit
|
||||
bl user_exit_callable
|
||||
ldmia sp!, {r0-r3, ip, lr}
|
||||
.else
|
||||
bl context_tracking_user_exit
|
||||
bl user_exit_callable
|
||||
.endif
|
||||
#endif
|
||||
.endm
|
||||
|
||||
.macro ct_user_enter, save = 1
|
||||
#ifdef CONFIG_CONTEXT_TRACKING
|
||||
#ifdef CONFIG_CONTEXT_TRACKING_USER
|
||||
.if \save
|
||||
stmdb sp!, {r0-r3, ip, lr}
|
||||
bl context_tracking_user_enter
|
||||
bl user_enter_callable
|
||||
ldmia sp!, {r0-r3, ip, lr}
|
||||
.else
|
||||
bl context_tracking_user_enter
|
||||
bl user_enter_callable
|
||||
.endif
|
||||
#endif
|
||||
.endm
|
||||
|
@ -3,6 +3,7 @@
|
||||
* Copyright (C) 2012 Freescale Semiconductor, Inc.
|
||||
*/
|
||||
|
||||
#include <linux/context_tracking.h>
|
||||
#include <linux/cpuidle.h>
|
||||
#include <linux/module.h>
|
||||
#include <asm/cpuidle.h>
|
||||
@ -24,9 +25,9 @@ static int imx6q_enter_wait(struct cpuidle_device *dev,
|
||||
imx6_set_lpm(WAIT_UNCLOCKED);
|
||||
raw_spin_unlock(&cpuidle_lock);
|
||||
|
||||
rcu_idle_enter();
|
||||
ct_idle_enter();
|
||||
cpu_do_idle();
|
||||
rcu_idle_exit();
|
||||
ct_idle_exit();
|
||||
|
||||
raw_spin_lock(&cpuidle_lock);
|
||||
if (num_idle_cpus-- == num_online_cpus())
|
||||
|
@ -174,7 +174,7 @@ config ARM64
|
||||
select HAVE_C_RECORDMCOUNT
|
||||
select HAVE_CMPXCHG_DOUBLE
|
||||
select HAVE_CMPXCHG_LOCAL
|
||||
select HAVE_CONTEXT_TRACKING
|
||||
select HAVE_CONTEXT_TRACKING_USER
|
||||
select HAVE_DEBUG_KMEMLEAK
|
||||
select HAVE_DMA_CONTIGUOUS
|
||||
select HAVE_DYNAMIC_FTRACE
|
||||
|
@ -41,7 +41,7 @@ static __always_inline void __enter_from_kernel_mode(struct pt_regs *regs)
|
||||
|
||||
if (!IS_ENABLED(CONFIG_TINY_RCU) && is_idle_task(current)) {
|
||||
lockdep_hardirqs_off(CALLER_ADDR0);
|
||||
rcu_irq_enter();
|
||||
ct_irq_enter();
|
||||
trace_hardirqs_off_finish();
|
||||
|
||||
regs->exit_rcu = true;
|
||||
@ -76,7 +76,7 @@ static __always_inline void __exit_to_kernel_mode(struct pt_regs *regs)
|
||||
if (regs->exit_rcu) {
|
||||
trace_hardirqs_on_prepare();
|
||||
lockdep_hardirqs_on_prepare();
|
||||
rcu_irq_exit();
|
||||
ct_irq_exit();
|
||||
lockdep_hardirqs_on(CALLER_ADDR0);
|
||||
return;
|
||||
}
|
||||
@ -84,7 +84,7 @@ static __always_inline void __exit_to_kernel_mode(struct pt_regs *regs)
|
||||
trace_hardirqs_on();
|
||||
} else {
|
||||
if (regs->exit_rcu)
|
||||
rcu_irq_exit();
|
||||
ct_irq_exit();
|
||||
}
|
||||
}
|
||||
|
||||
@ -161,7 +161,7 @@ static void noinstr arm64_enter_nmi(struct pt_regs *regs)
|
||||
__nmi_enter();
|
||||
lockdep_hardirqs_off(CALLER_ADDR0);
|
||||
lockdep_hardirq_enter();
|
||||
rcu_nmi_enter();
|
||||
ct_nmi_enter();
|
||||
|
||||
trace_hardirqs_off_finish();
|
||||
ftrace_nmi_enter();
|
||||
@ -182,7 +182,7 @@ static void noinstr arm64_exit_nmi(struct pt_regs *regs)
|
||||
lockdep_hardirqs_on_prepare();
|
||||
}
|
||||
|
||||
rcu_nmi_exit();
|
||||
ct_nmi_exit();
|
||||
lockdep_hardirq_exit();
|
||||
if (restore)
|
||||
lockdep_hardirqs_on(CALLER_ADDR0);
|
||||
@ -199,7 +199,7 @@ static void noinstr arm64_enter_el1_dbg(struct pt_regs *regs)
|
||||
regs->lockdep_hardirqs = lockdep_hardirqs_enabled();
|
||||
|
||||
lockdep_hardirqs_off(CALLER_ADDR0);
|
||||
rcu_nmi_enter();
|
||||
ct_nmi_enter();
|
||||
|
||||
trace_hardirqs_off_finish();
|
||||
}
|
||||
@ -218,7 +218,7 @@ static void noinstr arm64_exit_el1_dbg(struct pt_regs *regs)
|
||||
lockdep_hardirqs_on_prepare();
|
||||
}
|
||||
|
||||
rcu_nmi_exit();
|
||||
ct_nmi_exit();
|
||||
if (restore)
|
||||
lockdep_hardirqs_on(CALLER_ADDR0);
|
||||
}
|
||||
|
@ -42,7 +42,7 @@ config CSKY
|
||||
select HAVE_ARCH_AUDITSYSCALL
|
||||
select HAVE_ARCH_MMAP_RND_BITS
|
||||
select HAVE_ARCH_SECCOMP_FILTER
|
||||
select HAVE_CONTEXT_TRACKING
|
||||
select HAVE_CONTEXT_TRACKING_USER
|
||||
select HAVE_VIRT_CPU_ACCOUNTING_GEN
|
||||
select HAVE_DEBUG_BUGVERBOSE
|
||||
select HAVE_DEBUG_KMEMLEAK
|
||||
|
@ -19,11 +19,11 @@
|
||||
.endm
|
||||
|
||||
.macro context_tracking
|
||||
#ifdef CONFIG_CONTEXT_TRACKING
|
||||
#ifdef CONFIG_CONTEXT_TRACKING_USER
|
||||
mfcr a0, epsr
|
||||
btsti a0, 31
|
||||
bt 1f
|
||||
jbsr context_tracking_user_exit
|
||||
jbsr user_exit_callable
|
||||
ldw a0, (sp, LSAVE_A0)
|
||||
ldw a1, (sp, LSAVE_A1)
|
||||
ldw a2, (sp, LSAVE_A2)
|
||||
@ -159,8 +159,8 @@ ret_from_exception:
|
||||
and r10, r9
|
||||
cmpnei r10, 0
|
||||
bt exit_work
|
||||
#ifdef CONFIG_CONTEXT_TRACKING
|
||||
jbsr context_tracking_user_enter
|
||||
#ifdef CONFIG_CONTEXT_TRACKING_USER
|
||||
jbsr user_enter_callable
|
||||
#endif
|
||||
1:
|
||||
#ifdef CONFIG_PREEMPTION
|
||||
|
@ -76,7 +76,7 @@ config LOONGARCH
|
||||
select HAVE_ARCH_TRACEHOOK
|
||||
select HAVE_ARCH_TRANSPARENT_HUGEPAGE
|
||||
select HAVE_ASM_MODVERSIONS
|
||||
select HAVE_CONTEXT_TRACKING
|
||||
select HAVE_CONTEXT_TRACKING_USER
|
||||
select HAVE_COPY_THREAD_TLS
|
||||
select HAVE_DEBUG_STACKOVERFLOW
|
||||
select HAVE_DMA_CONTIGUOUS
|
||||
|
@ -56,7 +56,7 @@ config MIPS
|
||||
select HAVE_ARCH_TRACEHOOK
|
||||
select HAVE_ARCH_TRANSPARENT_HUGEPAGE if CPU_SUPPORTS_HUGEPAGES
|
||||
select HAVE_ASM_MODVERSIONS
|
||||
select HAVE_CONTEXT_TRACKING
|
||||
select HAVE_CONTEXT_TRACKING_USER
|
||||
select HAVE_TIF_NOHZ
|
||||
select HAVE_C_RECORDMCOUNT
|
||||
select HAVE_DEBUG_KMEMLEAK
|
||||
|
@ -202,7 +202,7 @@ config PPC
|
||||
select HAVE_ARCH_SECCOMP_FILTER
|
||||
select HAVE_ARCH_TRACEHOOK
|
||||
select HAVE_ASM_MODVERSIONS
|
||||
select HAVE_CONTEXT_TRACKING if PPC64
|
||||
select HAVE_CONTEXT_TRACKING_USER if PPC64
|
||||
select HAVE_C_RECORDMCOUNT
|
||||
select HAVE_DEBUG_KMEMLEAK
|
||||
select HAVE_DEBUG_STACKOVERFLOW
|
||||
|
@ -2,7 +2,7 @@
|
||||
#ifndef _ASM_POWERPC_CONTEXT_TRACKING_H
|
||||
#define _ASM_POWERPC_CONTEXT_TRACKING_H
|
||||
|
||||
#ifdef CONFIG_CONTEXT_TRACKING
|
||||
#ifdef CONFIG_CONTEXT_TRACKING_USER
|
||||
#define SCHEDULE_USER bl schedule_user
|
||||
#else
|
||||
#define SCHEDULE_USER bl schedule
|
||||
|
@ -86,7 +86,7 @@ config RISCV
|
||||
select HAVE_ARCH_THREAD_STRUCT_WHITELIST
|
||||
select HAVE_ARCH_VMAP_STACK if MMU && 64BIT
|
||||
select HAVE_ASM_MODVERSIONS
|
||||
select HAVE_CONTEXT_TRACKING
|
||||
select HAVE_CONTEXT_TRACKING_USER
|
||||
select HAVE_DEBUG_KMEMLEAK
|
||||
select HAVE_DMA_CONTIGUOUS if MMU
|
||||
select HAVE_EBPF_JIT if MMU
|
||||
|
@ -111,12 +111,12 @@ _save_context:
|
||||
call __trace_hardirqs_off
|
||||
#endif
|
||||
|
||||
#ifdef CONFIG_CONTEXT_TRACKING
|
||||
/* If previous state is in user mode, call context_tracking_user_exit. */
|
||||
#ifdef CONFIG_CONTEXT_TRACKING_USER
|
||||
/* If previous state is in user mode, call user_exit_callable(). */
|
||||
li a0, SR_PP
|
||||
and a0, s1, a0
|
||||
bnez a0, skip_context_tracking
|
||||
call context_tracking_user_exit
|
||||
call user_exit_callable
|
||||
skip_context_tracking:
|
||||
#endif
|
||||
|
||||
@ -176,7 +176,7 @@ handle_syscall:
|
||||
*/
|
||||
csrs CSR_STATUS, SR_IE
|
||||
#endif
|
||||
#if defined(CONFIG_TRACE_IRQFLAGS) || defined(CONFIG_CONTEXT_TRACKING)
|
||||
#if defined(CONFIG_TRACE_IRQFLAGS) || defined(CONFIG_CONTEXT_TRACKING_USER)
|
||||
/* Recover a0 - a7 for system calls */
|
||||
REG_L a0, PT_A0(sp)
|
||||
REG_L a1, PT_A1(sp)
|
||||
@ -269,8 +269,8 @@ resume_userspace:
|
||||
andi s1, s0, _TIF_WORK_MASK
|
||||
bnez s1, work_pending
|
||||
|
||||
#ifdef CONFIG_CONTEXT_TRACKING
|
||||
call context_tracking_user_enter
|
||||
#ifdef CONFIG_CONTEXT_TRACKING_USER
|
||||
call user_enter_callable
|
||||
#endif
|
||||
|
||||
/* Save unwound kernel stack pointer in thread_info */
|
||||
|
@ -71,7 +71,7 @@ config SPARC64
|
||||
select HAVE_DYNAMIC_FTRACE
|
||||
select HAVE_FTRACE_MCOUNT_RECORD
|
||||
select HAVE_SYSCALL_TRACEPOINTS
|
||||
select HAVE_CONTEXT_TRACKING
|
||||
select HAVE_CONTEXT_TRACKING_USER
|
||||
select HAVE_TIF_NOHZ
|
||||
select HAVE_DEBUG_KMEMLEAK
|
||||
select IOMMU_HELPER
|
||||
|
@ -15,7 +15,7 @@
|
||||
#include <asm/visasm.h>
|
||||
#include <asm/processor.h>
|
||||
|
||||
#ifdef CONFIG_CONTEXT_TRACKING
|
||||
#ifdef CONFIG_CONTEXT_TRACKING_USER
|
||||
# define SCHEDULE_USER schedule_user
|
||||
#else
|
||||
# define SCHEDULE_USER schedule
|
||||
|
@ -186,8 +186,8 @@ config X86
|
||||
select HAVE_ASM_MODVERSIONS
|
||||
select HAVE_CMPXCHG_DOUBLE
|
||||
select HAVE_CMPXCHG_LOCAL
|
||||
select HAVE_CONTEXT_TRACKING if X86_64
|
||||
select HAVE_CONTEXT_TRACKING_OFFSTACK if HAVE_CONTEXT_TRACKING
|
||||
select HAVE_CONTEXT_TRACKING_USER if X86_64
|
||||
select HAVE_CONTEXT_TRACKING_USER_OFFSTACK if HAVE_CONTEXT_TRACKING_USER
|
||||
select HAVE_C_RECORDMCOUNT
|
||||
select HAVE_OBJTOOL_MCOUNT if HAVE_OBJTOOL
|
||||
select HAVE_BUILDTIME_MCOUNT_SORT
|
||||
|
@ -1526,7 +1526,7 @@ DEFINE_IDTENTRY_RAW_ERRORCODE(exc_page_fault)
|
||||
|
||||
/*
|
||||
* Entry handling for valid #PF from kernel mode is slightly
|
||||
* different: RCU is already watching and rcu_irq_enter() must not
|
||||
* different: RCU is already watching and ct_irq_enter() must not
|
||||
* be invoked because a kernel fault on a user space address might
|
||||
* sleep.
|
||||
*
|
||||
|
@ -33,7 +33,7 @@ config XTENSA
|
||||
select HAVE_ARCH_KCSAN
|
||||
select HAVE_ARCH_SECCOMP_FILTER
|
||||
select HAVE_ARCH_TRACEHOOK
|
||||
select HAVE_CONTEXT_TRACKING
|
||||
select HAVE_CONTEXT_TRACKING_USER
|
||||
select HAVE_DEBUG_KMEMLEAK
|
||||
select HAVE_DMA_CONTIGUOUS
|
||||
select HAVE_EXIT_THREAD
|
||||
|
@ -455,10 +455,10 @@ KABI_W or a3, a3, a2
|
||||
abi_call trace_hardirqs_off
|
||||
1:
|
||||
#endif
|
||||
#ifdef CONFIG_CONTEXT_TRACKING
|
||||
#ifdef CONFIG_CONTEXT_TRACKING_USER
|
||||
l32i abi_tmp0, a1, PT_PS
|
||||
bbci.l abi_tmp0, PS_UM_BIT, 1f
|
||||
abi_call context_tracking_user_exit
|
||||
abi_call user_exit_callable
|
||||
1:
|
||||
#endif
|
||||
|
||||
@ -544,8 +544,8 @@ common_exception_return:
|
||||
j .Lrestore_state
|
||||
|
||||
.Lexit_tif_loop_user:
|
||||
#ifdef CONFIG_CONTEXT_TRACKING
|
||||
abi_call context_tracking_user_enter
|
||||
#ifdef CONFIG_CONTEXT_TRACKING_USER
|
||||
abi_call user_enter_callable
|
||||
#endif
|
||||
#ifdef CONFIG_HAVE_HW_BREAKPOINT
|
||||
_bbci.l abi_saved0, TIF_DB_DISABLED, 1f
|
||||
|
@ -23,6 +23,7 @@
|
||||
#include <linux/minmax.h>
|
||||
#include <linux/perf_event.h>
|
||||
#include <acpi/processor.h>
|
||||
#include <linux/context_tracking.h>
|
||||
|
||||
/*
|
||||
* Include the apic definitions for x86 to have the APIC timer related defines
|
||||
@ -647,11 +648,11 @@ static int acpi_idle_enter_bm(struct cpuidle_driver *drv,
|
||||
raw_spin_unlock(&c3_lock);
|
||||
}
|
||||
|
||||
rcu_idle_enter();
|
||||
ct_idle_enter();
|
||||
|
||||
acpi_idle_do_entry(cx);
|
||||
|
||||
rcu_idle_exit();
|
||||
ct_idle_exit();
|
||||
|
||||
/* Re-enable bus master arbitration */
|
||||
if (dis_bm) {
|
||||
|
@ -69,12 +69,12 @@ static int __psci_enter_domain_idle_state(struct cpuidle_device *dev,
|
||||
return -1;
|
||||
|
||||
/* Do runtime PM to manage a hierarchical CPU toplogy. */
|
||||
rcu_irq_enter_irqson();
|
||||
ct_irq_enter_irqson();
|
||||
if (s2idle)
|
||||
dev_pm_genpd_suspend(pd_dev);
|
||||
else
|
||||
pm_runtime_put_sync_suspend(pd_dev);
|
||||
rcu_irq_exit_irqson();
|
||||
ct_irq_exit_irqson();
|
||||
|
||||
state = psci_get_domain_state();
|
||||
if (!state)
|
||||
@ -82,12 +82,12 @@ static int __psci_enter_domain_idle_state(struct cpuidle_device *dev,
|
||||
|
||||
ret = psci_cpu_suspend_enter(state) ? -1 : idx;
|
||||
|
||||
rcu_irq_enter_irqson();
|
||||
ct_irq_enter_irqson();
|
||||
if (s2idle)
|
||||
dev_pm_genpd_resume(pd_dev);
|
||||
else
|
||||
pm_runtime_get_sync(pd_dev);
|
||||
rcu_irq_exit_irqson();
|
||||
ct_irq_exit_irqson();
|
||||
|
||||
cpu_pm_exit();
|
||||
|
||||
|
@ -116,12 +116,12 @@ static int __sbi_enter_domain_idle_state(struct cpuidle_device *dev,
|
||||
return -1;
|
||||
|
||||
/* Do runtime PM to manage a hierarchical CPU toplogy. */
|
||||
rcu_irq_enter_irqson();
|
||||
ct_irq_enter_irqson();
|
||||
if (s2idle)
|
||||
dev_pm_genpd_suspend(pd_dev);
|
||||
else
|
||||
pm_runtime_put_sync_suspend(pd_dev);
|
||||
rcu_irq_exit_irqson();
|
||||
ct_irq_exit_irqson();
|
||||
|
||||
if (sbi_is_domain_state_available())
|
||||
state = sbi_get_domain_state();
|
||||
@ -130,12 +130,12 @@ static int __sbi_enter_domain_idle_state(struct cpuidle_device *dev,
|
||||
|
||||
ret = sbi_suspend(state) ? -1 : idx;
|
||||
|
||||
rcu_irq_enter_irqson();
|
||||
ct_irq_enter_irqson();
|
||||
if (s2idle)
|
||||
dev_pm_genpd_resume(pd_dev);
|
||||
else
|
||||
pm_runtime_get_sync(pd_dev);
|
||||
rcu_irq_exit_irqson();
|
||||
ct_irq_exit_irqson();
|
||||
|
||||
cpu_pm_exit();
|
||||
|
||||
|
@ -23,6 +23,7 @@
|
||||
#include <linux/suspend.h>
|
||||
#include <linux/tick.h>
|
||||
#include <linux/mmu_context.h>
|
||||
#include <linux/context_tracking.h>
|
||||
#include <trace/events/power.h>
|
||||
|
||||
#include "cpuidle.h"
|
||||
@ -150,12 +151,12 @@ static void enter_s2idle_proper(struct cpuidle_driver *drv,
|
||||
*/
|
||||
stop_critical_timings();
|
||||
if (!(target_state->flags & CPUIDLE_FLAG_RCU_IDLE))
|
||||
rcu_idle_enter();
|
||||
ct_idle_enter();
|
||||
target_state->enter_s2idle(dev, drv, index);
|
||||
if (WARN_ON_ONCE(!irqs_disabled()))
|
||||
local_irq_disable();
|
||||
if (!(target_state->flags & CPUIDLE_FLAG_RCU_IDLE))
|
||||
rcu_idle_exit();
|
||||
ct_idle_exit();
|
||||
tick_unfreeze();
|
||||
start_critical_timings();
|
||||
|
||||
@ -233,10 +234,10 @@ int cpuidle_enter_state(struct cpuidle_device *dev, struct cpuidle_driver *drv,
|
||||
|
||||
stop_critical_timings();
|
||||
if (!(target_state->flags & CPUIDLE_FLAG_RCU_IDLE))
|
||||
rcu_idle_enter();
|
||||
ct_idle_enter();
|
||||
entered_state = target_state->enter(dev, drv, index);
|
||||
if (!(target_state->flags & CPUIDLE_FLAG_RCU_IDLE))
|
||||
rcu_idle_exit();
|
||||
ct_idle_exit();
|
||||
start_critical_timings();
|
||||
|
||||
sched_clock_idle_wakeup_event();
|
||||
|
@ -10,71 +10,72 @@
|
||||
#include <asm/ptrace.h>
|
||||
|
||||
|
||||
#ifdef CONFIG_CONTEXT_TRACKING
|
||||
extern void context_tracking_cpu_set(int cpu);
|
||||
#ifdef CONFIG_CONTEXT_TRACKING_USER
|
||||
extern void ct_cpu_track_user(int cpu);
|
||||
|
||||
/* Called with interrupts disabled. */
|
||||
extern void __context_tracking_enter(enum ctx_state state);
|
||||
extern void __context_tracking_exit(enum ctx_state state);
|
||||
extern void __ct_user_enter(enum ctx_state state);
|
||||
extern void __ct_user_exit(enum ctx_state state);
|
||||
|
||||
extern void context_tracking_enter(enum ctx_state state);
|
||||
extern void context_tracking_exit(enum ctx_state state);
|
||||
extern void context_tracking_user_enter(void);
|
||||
extern void context_tracking_user_exit(void);
|
||||
extern void ct_user_enter(enum ctx_state state);
|
||||
extern void ct_user_exit(enum ctx_state state);
|
||||
|
||||
extern void user_enter_callable(void);
|
||||
extern void user_exit_callable(void);
|
||||
|
||||
static inline void user_enter(void)
|
||||
{
|
||||
if (context_tracking_enabled())
|
||||
context_tracking_enter(CONTEXT_USER);
|
||||
ct_user_enter(CONTEXT_USER);
|
||||
|
||||
}
|
||||
static inline void user_exit(void)
|
||||
{
|
||||
if (context_tracking_enabled())
|
||||
context_tracking_exit(CONTEXT_USER);
|
||||
ct_user_exit(CONTEXT_USER);
|
||||
}
|
||||
|
||||
/* Called with interrupts disabled. */
|
||||
static __always_inline void user_enter_irqoff(void)
|
||||
{
|
||||
if (context_tracking_enabled())
|
||||
__context_tracking_enter(CONTEXT_USER);
|
||||
__ct_user_enter(CONTEXT_USER);
|
||||
|
||||
}
|
||||
static __always_inline void user_exit_irqoff(void)
|
||||
{
|
||||
if (context_tracking_enabled())
|
||||
__context_tracking_exit(CONTEXT_USER);
|
||||
__ct_user_exit(CONTEXT_USER);
|
||||
}
|
||||
|
||||
static inline enum ctx_state exception_enter(void)
|
||||
{
|
||||
enum ctx_state prev_ctx;
|
||||
|
||||
if (IS_ENABLED(CONFIG_HAVE_CONTEXT_TRACKING_OFFSTACK) ||
|
||||
if (IS_ENABLED(CONFIG_HAVE_CONTEXT_TRACKING_USER_OFFSTACK) ||
|
||||
!context_tracking_enabled())
|
||||
return 0;
|
||||
|
||||
prev_ctx = this_cpu_read(context_tracking.state);
|
||||
prev_ctx = __ct_state();
|
||||
if (prev_ctx != CONTEXT_KERNEL)
|
||||
context_tracking_exit(prev_ctx);
|
||||
ct_user_exit(prev_ctx);
|
||||
|
||||
return prev_ctx;
|
||||
}
|
||||
|
||||
static inline void exception_exit(enum ctx_state prev_ctx)
|
||||
{
|
||||
if (!IS_ENABLED(CONFIG_HAVE_CONTEXT_TRACKING_OFFSTACK) &&
|
||||
if (!IS_ENABLED(CONFIG_HAVE_CONTEXT_TRACKING_USER_OFFSTACK) &&
|
||||
context_tracking_enabled()) {
|
||||
if (prev_ctx != CONTEXT_KERNEL)
|
||||
context_tracking_enter(prev_ctx);
|
||||
ct_user_enter(prev_ctx);
|
||||
}
|
||||
}
|
||||
|
||||
static __always_inline bool context_tracking_guest_enter(void)
|
||||
{
|
||||
if (context_tracking_enabled())
|
||||
__context_tracking_enter(CONTEXT_GUEST);
|
||||
__ct_user_enter(CONTEXT_GUEST);
|
||||
|
||||
return context_tracking_enabled_this_cpu();
|
||||
}
|
||||
@ -82,40 +83,56 @@ static __always_inline bool context_tracking_guest_enter(void)
|
||||
static __always_inline void context_tracking_guest_exit(void)
|
||||
{
|
||||
if (context_tracking_enabled())
|
||||
__context_tracking_exit(CONTEXT_GUEST);
|
||||
__ct_user_exit(CONTEXT_GUEST);
|
||||
}
|
||||
|
||||
/**
|
||||
* ct_state() - return the current context tracking state if known
|
||||
*
|
||||
* Returns the current cpu's context tracking state if context tracking
|
||||
* is enabled. If context tracking is disabled, returns
|
||||
* CONTEXT_DISABLED. This should be used primarily for debugging.
|
||||
*/
|
||||
static __always_inline enum ctx_state ct_state(void)
|
||||
{
|
||||
return context_tracking_enabled() ?
|
||||
this_cpu_read(context_tracking.state) : CONTEXT_DISABLED;
|
||||
}
|
||||
#define CT_WARN_ON(cond) WARN_ON(context_tracking_enabled() && (cond))
|
||||
|
||||
#else
|
||||
static inline void user_enter(void) { }
|
||||
static inline void user_exit(void) { }
|
||||
static inline void user_enter_irqoff(void) { }
|
||||
static inline void user_exit_irqoff(void) { }
|
||||
static inline enum ctx_state exception_enter(void) { return 0; }
|
||||
static inline int exception_enter(void) { return 0; }
|
||||
static inline void exception_exit(enum ctx_state prev_ctx) { }
|
||||
static inline enum ctx_state ct_state(void) { return CONTEXT_DISABLED; }
|
||||
static inline int ct_state(void) { return -1; }
|
||||
static __always_inline bool context_tracking_guest_enter(void) { return false; }
|
||||
static inline void context_tracking_guest_exit(void) { }
|
||||
#define CT_WARN_ON(cond) do { } while (0)
|
||||
#endif /* !CONFIG_CONTEXT_TRACKING_USER */
|
||||
|
||||
#endif /* !CONFIG_CONTEXT_TRACKING */
|
||||
|
||||
#define CT_WARN_ON(cond) WARN_ON(context_tracking_enabled() && (cond))
|
||||
|
||||
#ifdef CONFIG_CONTEXT_TRACKING_FORCE
|
||||
#ifdef CONFIG_CONTEXT_TRACKING_USER_FORCE
|
||||
extern void context_tracking_init(void);
|
||||
#else
|
||||
static inline void context_tracking_init(void) { }
|
||||
#endif /* CONFIG_CONTEXT_TRACKING_FORCE */
|
||||
#endif /* CONFIG_CONTEXT_TRACKING_USER_FORCE */
|
||||
|
||||
#ifdef CONFIG_CONTEXT_TRACKING_IDLE
|
||||
extern void ct_idle_enter(void);
|
||||
extern void ct_idle_exit(void);
|
||||
|
||||
/*
|
||||
* Is the current CPU in an extended quiescent state?
|
||||
*
|
||||
* No ordering, as we are sampling CPU-local information.
|
||||
*/
|
||||
static __always_inline bool rcu_dynticks_curr_cpu_in_eqs(void)
|
||||
{
|
||||
return !(arch_atomic_read(this_cpu_ptr(&context_tracking.state)) & RCU_DYNTICKS_IDX);
|
||||
}
|
||||
|
||||
/*
|
||||
* Increment the current CPU's context_tracking structure's ->state field
|
||||
* with ordering. Return the new value.
|
||||
*/
|
||||
static __always_inline unsigned long ct_state_inc(int incby)
|
||||
{
|
||||
return arch_atomic_add_return(incby, this_cpu_ptr(&context_tracking.state));
|
||||
}
|
||||
|
||||
#else
|
||||
static inline void ct_idle_enter(void) { }
|
||||
static inline void ct_idle_exit(void) { }
|
||||
#endif /* !CONFIG_CONTEXT_TRACKING_IDLE */
|
||||
|
||||
#endif
|
||||
|
21
include/linux/context_tracking_irq.h
Normal file
21
include/linux/context_tracking_irq.h
Normal file
@ -0,0 +1,21 @@
|
||||
/* SPDX-License-Identifier: GPL-2.0 */
|
||||
#ifndef _LINUX_CONTEXT_TRACKING_IRQ_H
|
||||
#define _LINUX_CONTEXT_TRACKING_IRQ_H
|
||||
|
||||
#ifdef CONFIG_CONTEXT_TRACKING_IDLE
|
||||
void ct_irq_enter(void);
|
||||
void ct_irq_exit(void);
|
||||
void ct_irq_enter_irqson(void);
|
||||
void ct_irq_exit_irqson(void);
|
||||
void ct_nmi_enter(void);
|
||||
void ct_nmi_exit(void);
|
||||
#else
|
||||
static inline void ct_irq_enter(void) { }
|
||||
static inline void ct_irq_exit(void) { }
|
||||
static inline void ct_irq_enter_irqson(void) { }
|
||||
static inline void ct_irq_exit_irqson(void) { }
|
||||
static inline void ct_nmi_enter(void) { }
|
||||
static inline void ct_nmi_exit(void) { }
|
||||
#endif
|
||||
|
||||
#endif
|
@ -4,8 +4,28 @@
|
||||
|
||||
#include <linux/percpu.h>
|
||||
#include <linux/static_key.h>
|
||||
#include <linux/context_tracking_irq.h>
|
||||
|
||||
/* Offset to allow distinguishing irq vs. task-based idle entry/exit. */
|
||||
#define DYNTICK_IRQ_NONIDLE ((LONG_MAX / 2) + 1)
|
||||
|
||||
enum ctx_state {
|
||||
CONTEXT_DISABLED = -1, /* returned by ct_state() if unknown */
|
||||
CONTEXT_KERNEL = 0,
|
||||
CONTEXT_IDLE = 1,
|
||||
CONTEXT_USER = 2,
|
||||
CONTEXT_GUEST = 3,
|
||||
CONTEXT_MAX = 4,
|
||||
};
|
||||
|
||||
/* Even value for idle, else odd. */
|
||||
#define RCU_DYNTICKS_IDX CONTEXT_MAX
|
||||
|
||||
#define CT_STATE_MASK (CONTEXT_MAX - 1)
|
||||
#define CT_DYNTICKS_MASK (~CT_STATE_MASK)
|
||||
|
||||
struct context_tracking {
|
||||
#ifdef CONFIG_CONTEXT_TRACKING_USER
|
||||
/*
|
||||
* When active is false, probes are unset in order
|
||||
* to minimize overhead: TIF flags are cleared
|
||||
@ -14,18 +34,73 @@ struct context_tracking {
|
||||
*/
|
||||
bool active;
|
||||
int recursion;
|
||||
enum ctx_state {
|
||||
CONTEXT_DISABLED = -1, /* returned by ct_state() if unknown */
|
||||
CONTEXT_KERNEL = 0,
|
||||
CONTEXT_USER,
|
||||
CONTEXT_GUEST,
|
||||
} state;
|
||||
#endif
|
||||
#ifdef CONFIG_CONTEXT_TRACKING
|
||||
atomic_t state;
|
||||
#endif
|
||||
#ifdef CONFIG_CONTEXT_TRACKING_IDLE
|
||||
long dynticks_nesting; /* Track process nesting level. */
|
||||
long dynticks_nmi_nesting; /* Track irq/NMI nesting level. */
|
||||
#endif
|
||||
};
|
||||
|
||||
#ifdef CONFIG_CONTEXT_TRACKING
|
||||
extern struct static_key_false context_tracking_key;
|
||||
DECLARE_PER_CPU(struct context_tracking, context_tracking);
|
||||
|
||||
static __always_inline int __ct_state(void)
|
||||
{
|
||||
return arch_atomic_read(this_cpu_ptr(&context_tracking.state)) & CT_STATE_MASK;
|
||||
}
|
||||
#endif
|
||||
|
||||
#ifdef CONFIG_CONTEXT_TRACKING_IDLE
|
||||
static __always_inline int ct_dynticks(void)
|
||||
{
|
||||
return atomic_read(this_cpu_ptr(&context_tracking.state)) & CT_DYNTICKS_MASK;
|
||||
}
|
||||
|
||||
static __always_inline int ct_dynticks_cpu(int cpu)
|
||||
{
|
||||
struct context_tracking *ct = per_cpu_ptr(&context_tracking, cpu);
|
||||
|
||||
return atomic_read(&ct->state) & CT_DYNTICKS_MASK;
|
||||
}
|
||||
|
||||
static __always_inline int ct_dynticks_cpu_acquire(int cpu)
|
||||
{
|
||||
struct context_tracking *ct = per_cpu_ptr(&context_tracking, cpu);
|
||||
|
||||
return atomic_read_acquire(&ct->state) & CT_DYNTICKS_MASK;
|
||||
}
|
||||
|
||||
static __always_inline long ct_dynticks_nesting(void)
|
||||
{
|
||||
return __this_cpu_read(context_tracking.dynticks_nesting);
|
||||
}
|
||||
|
||||
static __always_inline long ct_dynticks_nesting_cpu(int cpu)
|
||||
{
|
||||
struct context_tracking *ct = per_cpu_ptr(&context_tracking, cpu);
|
||||
|
||||
return ct->dynticks_nesting;
|
||||
}
|
||||
|
||||
static __always_inline long ct_dynticks_nmi_nesting(void)
|
||||
{
|
||||
return __this_cpu_read(context_tracking.dynticks_nmi_nesting);
|
||||
}
|
||||
|
||||
static __always_inline long ct_dynticks_nmi_nesting_cpu(int cpu)
|
||||
{
|
||||
struct context_tracking *ct = per_cpu_ptr(&context_tracking, cpu);
|
||||
|
||||
return ct->dynticks_nmi_nesting;
|
||||
}
|
||||
#endif /* #ifdef CONFIG_CONTEXT_TRACKING_IDLE */
|
||||
|
||||
#ifdef CONFIG_CONTEXT_TRACKING_USER
|
||||
extern struct static_key_false context_tracking_key;
|
||||
|
||||
static __always_inline bool context_tracking_enabled(void)
|
||||
{
|
||||
return static_branch_unlikely(&context_tracking_key);
|
||||
@ -41,15 +116,31 @@ static inline bool context_tracking_enabled_this_cpu(void)
|
||||
return context_tracking_enabled() && __this_cpu_read(context_tracking.active);
|
||||
}
|
||||
|
||||
static __always_inline bool context_tracking_in_user(void)
|
||||
/**
|
||||
* ct_state() - return the current context tracking state if known
|
||||
*
|
||||
* Returns the current cpu's context tracking state if context tracking
|
||||
* is enabled. If context tracking is disabled, returns
|
||||
* CONTEXT_DISABLED. This should be used primarily for debugging.
|
||||
*/
|
||||
static __always_inline int ct_state(void)
|
||||
{
|
||||
return __this_cpu_read(context_tracking.state) == CONTEXT_USER;
|
||||
int ret;
|
||||
|
||||
if (!context_tracking_enabled())
|
||||
return CONTEXT_DISABLED;
|
||||
|
||||
preempt_disable();
|
||||
ret = __ct_state();
|
||||
preempt_enable();
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
#else
|
||||
static __always_inline bool context_tracking_in_user(void) { return false; }
|
||||
static __always_inline bool context_tracking_enabled(void) { return false; }
|
||||
static __always_inline bool context_tracking_enabled_cpu(int cpu) { return false; }
|
||||
static __always_inline bool context_tracking_enabled_this_cpu(void) { return false; }
|
||||
#endif /* CONFIG_CONTEXT_TRACKING */
|
||||
#endif /* CONFIG_CONTEXT_TRACKING_USER */
|
||||
|
||||
#endif
|
||||
|
@ -357,7 +357,7 @@ void irqentry_exit_to_user_mode(struct pt_regs *regs);
|
||||
/**
|
||||
* struct irqentry_state - Opaque object for exception state storage
|
||||
* @exit_rcu: Used exclusively in the irqentry_*() calls; signals whether the
|
||||
* exit path has to invoke rcu_irq_exit().
|
||||
* exit path has to invoke ct_irq_exit().
|
||||
* @lockdep: Used exclusively in the irqentry_nmi_*() calls; ensures that
|
||||
* lockdep state is restored correctly on exit from nmi.
|
||||
*
|
||||
@ -395,12 +395,12 @@ typedef struct irqentry_state {
|
||||
*
|
||||
* For kernel mode entries RCU handling is done conditional. If RCU is
|
||||
* watching then the only RCU requirement is to check whether the tick has
|
||||
* to be restarted. If RCU is not watching then rcu_irq_enter() has to be
|
||||
* invoked on entry and rcu_irq_exit() on exit.
|
||||
* to be restarted. If RCU is not watching then ct_irq_enter() has to be
|
||||
* invoked on entry and ct_irq_exit() on exit.
|
||||
*
|
||||
* Avoiding the rcu_irq_enter/exit() calls is an optimization but also
|
||||
* Avoiding the ct_irq_enter/exit() calls is an optimization but also
|
||||
* solves the problem of kernel mode pagefaults which can schedule, which
|
||||
* is not possible after invoking rcu_irq_enter() without undoing it.
|
||||
* is not possible after invoking ct_irq_enter() without undoing it.
|
||||
*
|
||||
* For user mode entries irqentry_enter_from_user_mode() is invoked to
|
||||
* establish the proper context for NOHZ_FULL. Otherwise scheduling on exit
|
||||
|
@ -92,14 +92,6 @@ void irq_exit_rcu(void);
|
||||
#define arch_nmi_exit() do { } while (0)
|
||||
#endif
|
||||
|
||||
#ifdef CONFIG_TINY_RCU
|
||||
static inline void rcu_nmi_enter(void) { }
|
||||
static inline void rcu_nmi_exit(void) { }
|
||||
#else
|
||||
extern void rcu_nmi_enter(void);
|
||||
extern void rcu_nmi_exit(void);
|
||||
#endif
|
||||
|
||||
/*
|
||||
* NMI vs Tracing
|
||||
* --------------
|
||||
@ -124,7 +116,7 @@ extern void rcu_nmi_exit(void);
|
||||
do { \
|
||||
__nmi_enter(); \
|
||||
lockdep_hardirq_enter(); \
|
||||
rcu_nmi_enter(); \
|
||||
ct_nmi_enter(); \
|
||||
instrumentation_begin(); \
|
||||
ftrace_nmi_enter(); \
|
||||
instrumentation_end(); \
|
||||
@ -143,7 +135,7 @@ extern void rcu_nmi_exit(void);
|
||||
instrumentation_begin(); \
|
||||
ftrace_nmi_exit(); \
|
||||
instrumentation_end(); \
|
||||
rcu_nmi_exit(); \
|
||||
ct_nmi_exit(); \
|
||||
lockdep_hardirq_exit(); \
|
||||
__nmi_exit(); \
|
||||
} while (0)
|
||||
|
@ -29,6 +29,7 @@
|
||||
#include <linux/lockdep.h>
|
||||
#include <asm/processor.h>
|
||||
#include <linux/cpumask.h>
|
||||
#include <linux/context_tracking_irq.h>
|
||||
|
||||
#define ULONG_CMP_GE(a, b) (ULONG_MAX / 2 >= (a) - (b))
|
||||
#define ULONG_CMP_LT(a, b) (ULONG_MAX / 2 < (a) - (b))
|
||||
@ -104,13 +105,11 @@ static inline void rcu_sysrq_start(void) { }
|
||||
static inline void rcu_sysrq_end(void) { }
|
||||
#endif /* #else #ifdef CONFIG_RCU_STALL_COMMON */
|
||||
|
||||
#ifdef CONFIG_NO_HZ_FULL
|
||||
void rcu_user_enter(void);
|
||||
void rcu_user_exit(void);
|
||||
#if defined(CONFIG_NO_HZ_FULL) && (!defined(CONFIG_GENERIC_ENTRY) || !defined(CONFIG_KVM_XFER_TO_GUEST_WORK))
|
||||
void rcu_irq_work_resched(void);
|
||||
#else
|
||||
static inline void rcu_user_enter(void) { }
|
||||
static inline void rcu_user_exit(void) { }
|
||||
#endif /* CONFIG_NO_HZ_FULL */
|
||||
static inline void rcu_irq_work_resched(void) { }
|
||||
#endif
|
||||
|
||||
#ifdef CONFIG_RCU_NOCB_CPU
|
||||
void rcu_init_nohz(void);
|
||||
@ -129,7 +128,7 @@ static inline void rcu_nocb_flush_deferred_wakeup(void) { }
|
||||
* @a: Code that RCU needs to pay attention to.
|
||||
*
|
||||
* RCU read-side critical sections are forbidden in the inner idle loop,
|
||||
* that is, between the rcu_idle_enter() and the rcu_idle_exit() -- RCU
|
||||
* that is, between the ct_idle_enter() and the ct_idle_exit() -- RCU
|
||||
* will happily ignore any such read-side critical sections. However,
|
||||
* things like powertop need tracepoints in the inner idle loop.
|
||||
*
|
||||
@ -144,9 +143,9 @@ static inline void rcu_nocb_flush_deferred_wakeup(void) { }
|
||||
*/
|
||||
#define RCU_NONIDLE(a) \
|
||||
do { \
|
||||
rcu_irq_enter_irqson(); \
|
||||
ct_irq_enter_irqson(); \
|
||||
do { a; } while (0); \
|
||||
rcu_irq_exit_irqson(); \
|
||||
ct_irq_exit_irqson(); \
|
||||
} while (0)
|
||||
|
||||
/*
|
||||
|
@ -95,12 +95,6 @@ static inline int rcu_needs_cpu(void)
|
||||
static inline void rcu_virt_note_context_switch(int cpu) { }
|
||||
static inline void rcu_cpu_stall_reset(void) { }
|
||||
static inline int rcu_jiffies_till_stall_check(void) { return 21 * HZ; }
|
||||
static inline void rcu_idle_enter(void) { }
|
||||
static inline void rcu_idle_exit(void) { }
|
||||
static inline void rcu_irq_enter(void) { }
|
||||
static inline void rcu_irq_exit_irqson(void) { }
|
||||
static inline void rcu_irq_enter_irqson(void) { }
|
||||
static inline void rcu_irq_exit(void) { }
|
||||
static inline void rcu_irq_exit_check_preempt(void) { }
|
||||
#define rcu_is_idle_cpu(cpu) \
|
||||
(is_idle_task(current) && !in_nmi() && !in_hardirq() && !in_serving_softirq())
|
||||
|
@ -47,12 +47,6 @@ unsigned long start_poll_synchronize_rcu(void);
|
||||
bool poll_state_synchronize_rcu(unsigned long oldstate);
|
||||
void cond_synchronize_rcu(unsigned long oldstate);
|
||||
|
||||
void rcu_idle_enter(void);
|
||||
void rcu_idle_exit(void);
|
||||
void rcu_irq_enter(void);
|
||||
void rcu_irq_exit(void);
|
||||
void rcu_irq_enter_irqson(void);
|
||||
void rcu_irq_exit_irqson(void);
|
||||
bool rcu_is_idle_cpu(int cpu);
|
||||
|
||||
#ifdef CONFIG_PROVE_RCU
|
||||
@ -61,6 +55,9 @@ void rcu_irq_exit_check_preempt(void);
|
||||
static inline void rcu_irq_exit_check_preempt(void) { }
|
||||
#endif
|
||||
|
||||
struct task_struct;
|
||||
void rcu_preempt_deferred_qs(struct task_struct *t);
|
||||
|
||||
void exit_rcu(void);
|
||||
|
||||
void rcu_scheduler_starting(void);
|
||||
|
@ -200,13 +200,13 @@ static inline struct tracepoint *tracepoint_ptr_deref(tracepoint_ptr_t *p)
|
||||
*/ \
|
||||
if (rcuidle) { \
|
||||
__idx = srcu_read_lock_notrace(&tracepoint_srcu);\
|
||||
rcu_irq_enter_irqson(); \
|
||||
ct_irq_enter_irqson(); \
|
||||
} \
|
||||
\
|
||||
__DO_TRACE_CALL(name, TP_ARGS(args)); \
|
||||
\
|
||||
if (rcuidle) { \
|
||||
rcu_irq_exit_irqson(); \
|
||||
ct_irq_exit_irqson(); \
|
||||
srcu_read_unlock_notrace(&tracepoint_srcu, __idx);\
|
||||
} \
|
||||
\
|
||||
|
@ -494,11 +494,11 @@ config VIRT_CPU_ACCOUNTING_NATIVE
|
||||
|
||||
config VIRT_CPU_ACCOUNTING_GEN
|
||||
bool "Full dynticks CPU time accounting"
|
||||
depends on HAVE_CONTEXT_TRACKING
|
||||
depends on HAVE_CONTEXT_TRACKING_USER
|
||||
depends on HAVE_VIRT_CPU_ACCOUNTING_GEN
|
||||
depends on GENERIC_CLOCKEVENTS
|
||||
select VIRT_CPU_ACCOUNTING
|
||||
select CONTEXT_TRACKING
|
||||
select CONTEXT_TRACKING_USER
|
||||
help
|
||||
Select this option to enable task and CPU time accounting on full
|
||||
dynticks systems. This accounting is implemented by watching every
|
||||
|
@ -295,7 +295,7 @@ static inline cfi_check_fn find_check_fn(unsigned long ptr)
|
||||
rcu_idle = !rcu_is_watching();
|
||||
if (rcu_idle) {
|
||||
local_irq_save(flags);
|
||||
rcu_irq_enter();
|
||||
ct_irq_enter();
|
||||
}
|
||||
|
||||
if (IS_ENABLED(CONFIG_CFI_CLANG_SHADOW))
|
||||
@ -304,7 +304,7 @@ static inline cfi_check_fn find_check_fn(unsigned long ptr)
|
||||
fn = find_module_check_fn(ptr);
|
||||
|
||||
if (rcu_idle) {
|
||||
rcu_irq_exit();
|
||||
ct_irq_exit();
|
||||
local_irq_restore(flags);
|
||||
}
|
||||
|
||||
|
@ -1,18 +1,20 @@
|
||||
// SPDX-License-Identifier: GPL-2.0-only
|
||||
/*
|
||||
* Context tracking: Probe on high level context boundaries such as kernel
|
||||
* and userspace. This includes syscalls and exceptions entry/exit.
|
||||
* Context tracking: Probe on high level context boundaries such as kernel,
|
||||
* userspace, guest or idle.
|
||||
*
|
||||
* This is used by RCU to remove its dependency on the timer tick while a CPU
|
||||
* runs in userspace.
|
||||
* runs in idle, userspace or guest mode.
|
||||
*
|
||||
* Started by Frederic Weisbecker:
|
||||
* User/guest tracking started by Frederic Weisbecker:
|
||||
*
|
||||
* Copyright (C) 2012 Red Hat, Inc., Frederic Weisbecker <fweisbec@redhat.com>
|
||||
* Copyright (C) 2012 Red Hat, Inc., Frederic Weisbecker
|
||||
*
|
||||
* Many thanks to Gilad Ben-Yossef, Paul McKenney, Ingo Molnar, Andrew Morton,
|
||||
* Steven Rostedt, Peter Zijlstra for suggestions and improvements.
|
||||
*
|
||||
* RCU extended quiescent state bits imported from kernel/rcu/tree.c
|
||||
* where the relevant authorship may be found.
|
||||
*/
|
||||
|
||||
#include <linux/context_tracking.h>
|
||||
@ -21,6 +23,411 @@
|
||||
#include <linux/hardirq.h>
|
||||
#include <linux/export.h>
|
||||
#include <linux/kprobes.h>
|
||||
#include <trace/events/rcu.h>
|
||||
|
||||
|
||||
DEFINE_PER_CPU(struct context_tracking, context_tracking) = {
|
||||
#ifdef CONFIG_CONTEXT_TRACKING_IDLE
|
||||
.dynticks_nesting = 1,
|
||||
.dynticks_nmi_nesting = DYNTICK_IRQ_NONIDLE,
|
||||
#endif
|
||||
.state = ATOMIC_INIT(RCU_DYNTICKS_IDX),
|
||||
};
|
||||
EXPORT_SYMBOL_GPL(context_tracking);
|
||||
|
||||
#ifdef CONFIG_CONTEXT_TRACKING_IDLE
|
||||
#define TPS(x) tracepoint_string(x)
|
||||
|
||||
/* Record the current task on dyntick-idle entry. */
|
||||
static __always_inline void rcu_dynticks_task_enter(void)
|
||||
{
|
||||
#if defined(CONFIG_TASKS_RCU) && defined(CONFIG_NO_HZ_FULL)
|
||||
WRITE_ONCE(current->rcu_tasks_idle_cpu, smp_processor_id());
|
||||
#endif /* #if defined(CONFIG_TASKS_RCU) && defined(CONFIG_NO_HZ_FULL) */
|
||||
}
|
||||
|
||||
/* Record no current task on dyntick-idle exit. */
|
||||
static __always_inline void rcu_dynticks_task_exit(void)
|
||||
{
|
||||
#if defined(CONFIG_TASKS_RCU) && defined(CONFIG_NO_HZ_FULL)
|
||||
WRITE_ONCE(current->rcu_tasks_idle_cpu, -1);
|
||||
#endif /* #if defined(CONFIG_TASKS_RCU) && defined(CONFIG_NO_HZ_FULL) */
|
||||
}
|
||||
|
||||
/* Turn on heavyweight RCU tasks trace readers on idle/user entry. */
|
||||
static __always_inline void rcu_dynticks_task_trace_enter(void)
|
||||
{
|
||||
#ifdef CONFIG_TASKS_TRACE_RCU
|
||||
if (IS_ENABLED(CONFIG_TASKS_TRACE_RCU_READ_MB))
|
||||
current->trc_reader_special.b.need_mb = true;
|
||||
#endif /* #ifdef CONFIG_TASKS_TRACE_RCU */
|
||||
}
|
||||
|
||||
/* Turn off heavyweight RCU tasks trace readers on idle/user exit. */
|
||||
static __always_inline void rcu_dynticks_task_trace_exit(void)
|
||||
{
|
||||
#ifdef CONFIG_TASKS_TRACE_RCU
|
||||
if (IS_ENABLED(CONFIG_TASKS_TRACE_RCU_READ_MB))
|
||||
current->trc_reader_special.b.need_mb = false;
|
||||
#endif /* #ifdef CONFIG_TASKS_TRACE_RCU */
|
||||
}
|
||||
|
||||
/*
|
||||
* Record entry into an extended quiescent state. This is only to be
|
||||
* called when not already in an extended quiescent state, that is,
|
||||
* RCU is watching prior to the call to this function and is no longer
|
||||
* watching upon return.
|
||||
*/
|
||||
static noinstr void ct_kernel_exit_state(int offset)
|
||||
{
|
||||
int seq;
|
||||
|
||||
/*
|
||||
* CPUs seeing atomic_add_return() must see prior RCU read-side
|
||||
* critical sections, and we also must force ordering with the
|
||||
* next idle sojourn.
|
||||
*/
|
||||
rcu_dynticks_task_trace_enter(); // Before ->dynticks update!
|
||||
seq = ct_state_inc(offset);
|
||||
// RCU is no longer watching. Better be in extended quiescent state!
|
||||
WARN_ON_ONCE(IS_ENABLED(CONFIG_RCU_EQS_DEBUG) && (seq & RCU_DYNTICKS_IDX));
|
||||
}
|
||||
|
||||
/*
|
||||
* Record exit from an extended quiescent state. This is only to be
|
||||
* called from an extended quiescent state, that is, RCU is not watching
|
||||
* prior to the call to this function and is watching upon return.
|
||||
*/
|
||||
static noinstr void ct_kernel_enter_state(int offset)
|
||||
{
|
||||
int seq;
|
||||
|
||||
/*
|
||||
* CPUs seeing atomic_add_return() must see prior idle sojourns,
|
||||
* and we also must force ordering with the next RCU read-side
|
||||
* critical section.
|
||||
*/
|
||||
seq = ct_state_inc(offset);
|
||||
// RCU is now watching. Better not be in an extended quiescent state!
|
||||
rcu_dynticks_task_trace_exit(); // After ->dynticks update!
|
||||
WARN_ON_ONCE(IS_ENABLED(CONFIG_RCU_EQS_DEBUG) && !(seq & RCU_DYNTICKS_IDX));
|
||||
}
|
||||
|
||||
/*
|
||||
* Enter an RCU extended quiescent state, which can be either the
|
||||
* idle loop or adaptive-tickless usermode execution.
|
||||
*
|
||||
* We crowbar the ->dynticks_nmi_nesting field to zero to allow for
|
||||
* the possibility of usermode upcalls having messed up our count
|
||||
* of interrupt nesting level during the prior busy period.
|
||||
*/
|
||||
static void noinstr ct_kernel_exit(bool user, int offset)
|
||||
{
|
||||
struct context_tracking *ct = this_cpu_ptr(&context_tracking);
|
||||
|
||||
WARN_ON_ONCE(ct_dynticks_nmi_nesting() != DYNTICK_IRQ_NONIDLE);
|
||||
WRITE_ONCE(ct->dynticks_nmi_nesting, 0);
|
||||
WARN_ON_ONCE(IS_ENABLED(CONFIG_RCU_EQS_DEBUG) &&
|
||||
ct_dynticks_nesting() == 0);
|
||||
if (ct_dynticks_nesting() != 1) {
|
||||
// RCU will still be watching, so just do accounting and leave.
|
||||
ct->dynticks_nesting--;
|
||||
return;
|
||||
}
|
||||
|
||||
instrumentation_begin();
|
||||
lockdep_assert_irqs_disabled();
|
||||
trace_rcu_dyntick(TPS("Start"), ct_dynticks_nesting(), 0, ct_dynticks());
|
||||
WARN_ON_ONCE(IS_ENABLED(CONFIG_RCU_EQS_DEBUG) && !user && !is_idle_task(current));
|
||||
rcu_preempt_deferred_qs(current);
|
||||
|
||||
// instrumentation for the noinstr ct_kernel_exit_state()
|
||||
instrument_atomic_write(&ct->state, sizeof(ct->state));
|
||||
|
||||
instrumentation_end();
|
||||
WRITE_ONCE(ct->dynticks_nesting, 0); /* Avoid irq-access tearing. */
|
||||
// RCU is watching here ...
|
||||
ct_kernel_exit_state(offset);
|
||||
// ... but is no longer watching here.
|
||||
rcu_dynticks_task_enter();
|
||||
}
|
||||
|
||||
/*
|
||||
* Exit an RCU extended quiescent state, which can be either the
|
||||
* idle loop or adaptive-tickless usermode execution.
|
||||
*
|
||||
* We crowbar the ->dynticks_nmi_nesting field to DYNTICK_IRQ_NONIDLE to
|
||||
* allow for the possibility of usermode upcalls messing up our count of
|
||||
* interrupt nesting level during the busy period that is just now starting.
|
||||
*/
|
||||
static void noinstr ct_kernel_enter(bool user, int offset)
|
||||
{
|
||||
struct context_tracking *ct = this_cpu_ptr(&context_tracking);
|
||||
long oldval;
|
||||
|
||||
WARN_ON_ONCE(IS_ENABLED(CONFIG_RCU_EQS_DEBUG) && !raw_irqs_disabled());
|
||||
oldval = ct_dynticks_nesting();
|
||||
WARN_ON_ONCE(IS_ENABLED(CONFIG_RCU_EQS_DEBUG) && oldval < 0);
|
||||
if (oldval) {
|
||||
// RCU was already watching, so just do accounting and leave.
|
||||
ct->dynticks_nesting++;
|
||||
return;
|
||||
}
|
||||
rcu_dynticks_task_exit();
|
||||
// RCU is not watching here ...
|
||||
ct_kernel_enter_state(offset);
|
||||
// ... but is watching here.
|
||||
instrumentation_begin();
|
||||
|
||||
// instrumentation for the noinstr ct_kernel_enter_state()
|
||||
instrument_atomic_write(&ct->state, sizeof(ct->state));
|
||||
|
||||
trace_rcu_dyntick(TPS("End"), ct_dynticks_nesting(), 1, ct_dynticks());
|
||||
WARN_ON_ONCE(IS_ENABLED(CONFIG_RCU_EQS_DEBUG) && !user && !is_idle_task(current));
|
||||
WRITE_ONCE(ct->dynticks_nesting, 1);
|
||||
WARN_ON_ONCE(ct_dynticks_nmi_nesting());
|
||||
WRITE_ONCE(ct->dynticks_nmi_nesting, DYNTICK_IRQ_NONIDLE);
|
||||
instrumentation_end();
|
||||
}
|
||||
|
||||
/**
|
||||
* ct_nmi_exit - inform RCU of exit from NMI context
|
||||
*
|
||||
* If we are returning from the outermost NMI handler that interrupted an
|
||||
* RCU-idle period, update ct->state and ct->dynticks_nmi_nesting
|
||||
* to let the RCU grace-period handling know that the CPU is back to
|
||||
* being RCU-idle.
|
||||
*
|
||||
* If you add or remove a call to ct_nmi_exit(), be sure to test
|
||||
* with CONFIG_RCU_EQS_DEBUG=y.
|
||||
*/
|
||||
void noinstr ct_nmi_exit(void)
|
||||
{
|
||||
struct context_tracking *ct = this_cpu_ptr(&context_tracking);
|
||||
|
||||
instrumentation_begin();
|
||||
/*
|
||||
* Check for ->dynticks_nmi_nesting underflow and bad ->dynticks.
|
||||
* (We are exiting an NMI handler, so RCU better be paying attention
|
||||
* to us!)
|
||||
*/
|
||||
WARN_ON_ONCE(ct_dynticks_nmi_nesting() <= 0);
|
||||
WARN_ON_ONCE(rcu_dynticks_curr_cpu_in_eqs());
|
||||
|
||||
/*
|
||||
* If the nesting level is not 1, the CPU wasn't RCU-idle, so
|
||||
* leave it in non-RCU-idle state.
|
||||
*/
|
||||
if (ct_dynticks_nmi_nesting() != 1) {
|
||||
trace_rcu_dyntick(TPS("--="), ct_dynticks_nmi_nesting(), ct_dynticks_nmi_nesting() - 2,
|
||||
ct_dynticks());
|
||||
WRITE_ONCE(ct->dynticks_nmi_nesting, /* No store tearing. */
|
||||
ct_dynticks_nmi_nesting() - 2);
|
||||
instrumentation_end();
|
||||
return;
|
||||
}
|
||||
|
||||
/* This NMI interrupted an RCU-idle CPU, restore RCU-idleness. */
|
||||
trace_rcu_dyntick(TPS("Startirq"), ct_dynticks_nmi_nesting(), 0, ct_dynticks());
|
||||
WRITE_ONCE(ct->dynticks_nmi_nesting, 0); /* Avoid store tearing. */
|
||||
|
||||
// instrumentation for the noinstr ct_kernel_exit_state()
|
||||
instrument_atomic_write(&ct->state, sizeof(ct->state));
|
||||
instrumentation_end();
|
||||
|
||||
// RCU is watching here ...
|
||||
ct_kernel_exit_state(RCU_DYNTICKS_IDX);
|
||||
// ... but is no longer watching here.
|
||||
|
||||
if (!in_nmi())
|
||||
rcu_dynticks_task_enter();
|
||||
}
|
||||
|
||||
/**
|
||||
* ct_nmi_enter - inform RCU of entry to NMI context
|
||||
*
|
||||
* If the CPU was idle from RCU's viewpoint, update ct->state and
|
||||
* ct->dynticks_nmi_nesting to let the RCU grace-period handling know
|
||||
* that the CPU is active. This implementation permits nested NMIs, as
|
||||
* long as the nesting level does not overflow an int. (You will probably
|
||||
* run out of stack space first.)
|
||||
*
|
||||
* If you add or remove a call to ct_nmi_enter(), be sure to test
|
||||
* with CONFIG_RCU_EQS_DEBUG=y.
|
||||
*/
|
||||
void noinstr ct_nmi_enter(void)
|
||||
{
|
||||
long incby = 2;
|
||||
struct context_tracking *ct = this_cpu_ptr(&context_tracking);
|
||||
|
||||
/* Complain about underflow. */
|
||||
WARN_ON_ONCE(ct_dynticks_nmi_nesting() < 0);
|
||||
|
||||
/*
|
||||
* If idle from RCU viewpoint, atomically increment ->dynticks
|
||||
* to mark non-idle and increment ->dynticks_nmi_nesting by one.
|
||||
* Otherwise, increment ->dynticks_nmi_nesting by two. This means
|
||||
* if ->dynticks_nmi_nesting is equal to one, we are guaranteed
|
||||
* to be in the outermost NMI handler that interrupted an RCU-idle
|
||||
* period (observation due to Andy Lutomirski).
|
||||
*/
|
||||
if (rcu_dynticks_curr_cpu_in_eqs()) {
|
||||
|
||||
if (!in_nmi())
|
||||
rcu_dynticks_task_exit();
|
||||
|
||||
// RCU is not watching here ...
|
||||
ct_kernel_enter_state(RCU_DYNTICKS_IDX);
|
||||
// ... but is watching here.
|
||||
|
||||
instrumentation_begin();
|
||||
// instrumentation for the noinstr rcu_dynticks_curr_cpu_in_eqs()
|
||||
instrument_atomic_read(&ct->state, sizeof(ct->state));
|
||||
// instrumentation for the noinstr ct_kernel_enter_state()
|
||||
instrument_atomic_write(&ct->state, sizeof(ct->state));
|
||||
|
||||
incby = 1;
|
||||
} else if (!in_nmi()) {
|
||||
instrumentation_begin();
|
||||
rcu_irq_enter_check_tick();
|
||||
} else {
|
||||
instrumentation_begin();
|
||||
}
|
||||
|
||||
trace_rcu_dyntick(incby == 1 ? TPS("Endirq") : TPS("++="),
|
||||
ct_dynticks_nmi_nesting(),
|
||||
ct_dynticks_nmi_nesting() + incby, ct_dynticks());
|
||||
instrumentation_end();
|
||||
WRITE_ONCE(ct->dynticks_nmi_nesting, /* Prevent store tearing. */
|
||||
ct_dynticks_nmi_nesting() + incby);
|
||||
barrier();
|
||||
}
|
||||
|
||||
/**
|
||||
* ct_idle_enter - inform RCU that current CPU is entering idle
|
||||
*
|
||||
* Enter idle mode, in other words, -leave- the mode in which RCU
|
||||
* read-side critical sections can occur. (Though RCU read-side
|
||||
* critical sections can occur in irq handlers in idle, a possibility
|
||||
* handled by irq_enter() and irq_exit().)
|
||||
*
|
||||
* If you add or remove a call to ct_idle_enter(), be sure to test with
|
||||
* CONFIG_RCU_EQS_DEBUG=y.
|
||||
*/
|
||||
void noinstr ct_idle_enter(void)
|
||||
{
|
||||
WARN_ON_ONCE(IS_ENABLED(CONFIG_RCU_EQS_DEBUG) && !raw_irqs_disabled());
|
||||
ct_kernel_exit(false, RCU_DYNTICKS_IDX + CONTEXT_IDLE);
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(ct_idle_enter);
|
||||
|
||||
/**
|
||||
* ct_idle_exit - inform RCU that current CPU is leaving idle
|
||||
*
|
||||
* Exit idle mode, in other words, -enter- the mode in which RCU
|
||||
* read-side critical sections can occur.
|
||||
*
|
||||
* If you add or remove a call to ct_idle_exit(), be sure to test with
|
||||
* CONFIG_RCU_EQS_DEBUG=y.
|
||||
*/
|
||||
void noinstr ct_idle_exit(void)
|
||||
{
|
||||
unsigned long flags;
|
||||
|
||||
raw_local_irq_save(flags);
|
||||
ct_kernel_enter(false, RCU_DYNTICKS_IDX - CONTEXT_IDLE);
|
||||
raw_local_irq_restore(flags);
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(ct_idle_exit);
|
||||
|
||||
/**
|
||||
* ct_irq_enter - inform RCU that current CPU is entering irq away from idle
|
||||
*
|
||||
* Enter an interrupt handler, which might possibly result in exiting
|
||||
* idle mode, in other words, entering the mode in which read-side critical
|
||||
* sections can occur. The caller must have disabled interrupts.
|
||||
*
|
||||
* Note that the Linux kernel is fully capable of entering an interrupt
|
||||
* handler that it never exits, for example when doing upcalls to user mode!
|
||||
* This code assumes that the idle loop never does upcalls to user mode.
|
||||
* If your architecture's idle loop does do upcalls to user mode (or does
|
||||
* anything else that results in unbalanced calls to the irq_enter() and
|
||||
* irq_exit() functions), RCU will give you what you deserve, good and hard.
|
||||
* But very infrequently and irreproducibly.
|
||||
*
|
||||
* Use things like work queues to work around this limitation.
|
||||
*
|
||||
* You have been warned.
|
||||
*
|
||||
* If you add or remove a call to ct_irq_enter(), be sure to test with
|
||||
* CONFIG_RCU_EQS_DEBUG=y.
|
||||
*/
|
||||
noinstr void ct_irq_enter(void)
|
||||
{
|
||||
lockdep_assert_irqs_disabled();
|
||||
ct_nmi_enter();
|
||||
}
|
||||
|
||||
/**
|
||||
* ct_irq_exit - inform RCU that current CPU is exiting irq towards idle
|
||||
*
|
||||
* Exit from an interrupt handler, which might possibly result in entering
|
||||
* idle mode, in other words, leaving the mode in which read-side critical
|
||||
* sections can occur. The caller must have disabled interrupts.
|
||||
*
|
||||
* This code assumes that the idle loop never does anything that might
|
||||
* result in unbalanced calls to irq_enter() and irq_exit(). If your
|
||||
* architecture's idle loop violates this assumption, RCU will give you what
|
||||
* you deserve, good and hard. But very infrequently and irreproducibly.
|
||||
*
|
||||
* Use things like work queues to work around this limitation.
|
||||
*
|
||||
* You have been warned.
|
||||
*
|
||||
* If you add or remove a call to ct_irq_exit(), be sure to test with
|
||||
* CONFIG_RCU_EQS_DEBUG=y.
|
||||
*/
|
||||
noinstr void ct_irq_exit(void)
|
||||
{
|
||||
lockdep_assert_irqs_disabled();
|
||||
ct_nmi_exit();
|
||||
}
|
||||
|
||||
/*
|
||||
* Wrapper for ct_irq_enter() where interrupts are enabled.
|
||||
*
|
||||
* If you add or remove a call to ct_irq_enter_irqson(), be sure to test
|
||||
* with CONFIG_RCU_EQS_DEBUG=y.
|
||||
*/
|
||||
void ct_irq_enter_irqson(void)
|
||||
{
|
||||
unsigned long flags;
|
||||
|
||||
local_irq_save(flags);
|
||||
ct_irq_enter();
|
||||
local_irq_restore(flags);
|
||||
}
|
||||
|
||||
/*
|
||||
* Wrapper for ct_irq_exit() where interrupts are enabled.
|
||||
*
|
||||
* If you add or remove a call to ct_irq_exit_irqson(), be sure to test
|
||||
* with CONFIG_RCU_EQS_DEBUG=y.
|
||||
*/
|
||||
void ct_irq_exit_irqson(void)
|
||||
{
|
||||
unsigned long flags;
|
||||
|
||||
local_irq_save(flags);
|
||||
ct_irq_exit();
|
||||
local_irq_restore(flags);
|
||||
}
|
||||
#else
|
||||
static __always_inline void ct_kernel_exit(bool user, int offset) { }
|
||||
static __always_inline void ct_kernel_enter(bool user, int offset) { }
|
||||
#endif /* #ifdef CONFIG_CONTEXT_TRACKING_IDLE */
|
||||
|
||||
#ifdef CONFIG_CONTEXT_TRACKING_USER
|
||||
|
||||
#define CREATE_TRACE_POINTS
|
||||
#include <trace/events/context_tracking.h>
|
||||
@ -28,9 +435,6 @@
|
||||
DEFINE_STATIC_KEY_FALSE(context_tracking_key);
|
||||
EXPORT_SYMBOL_GPL(context_tracking_key);
|
||||
|
||||
DEFINE_PER_CPU(struct context_tracking, context_tracking);
|
||||
EXPORT_SYMBOL_GPL(context_tracking);
|
||||
|
||||
static noinstr bool context_tracking_recursion_enter(void)
|
||||
{
|
||||
int recursion;
|
||||
@ -51,29 +455,32 @@ static __always_inline void context_tracking_recursion_exit(void)
|
||||
}
|
||||
|
||||
/**
|
||||
* context_tracking_enter - Inform the context tracking that the CPU is going
|
||||
* enter user or guest space mode.
|
||||
* __ct_user_enter - Inform the context tracking that the CPU is going
|
||||
* to enter user or guest space mode.
|
||||
*
|
||||
* This function must be called right before we switch from the kernel
|
||||
* to user or guest space, when it's guaranteed the remaining kernel
|
||||
* instructions to execute won't use any RCU read side critical section
|
||||
* because this function sets RCU in extended quiescent state.
|
||||
*/
|
||||
void noinstr __context_tracking_enter(enum ctx_state state)
|
||||
void noinstr __ct_user_enter(enum ctx_state state)
|
||||
{
|
||||
struct context_tracking *ct = this_cpu_ptr(&context_tracking);
|
||||
lockdep_assert_irqs_disabled();
|
||||
|
||||
/* Kernel threads aren't supposed to go to userspace */
|
||||
WARN_ON_ONCE(!current->mm);
|
||||
|
||||
if (!context_tracking_recursion_enter())
|
||||
return;
|
||||
|
||||
if ( __this_cpu_read(context_tracking.state) != state) {
|
||||
if (__this_cpu_read(context_tracking.active)) {
|
||||
if (__ct_state() != state) {
|
||||
if (ct->active) {
|
||||
/*
|
||||
* At this stage, only low level arch entry code remains and
|
||||
* then we'll run in userspace. We can assume there won't be
|
||||
* any RCU read-side critical section until the next call to
|
||||
* user_exit() or rcu_irq_enter(). Let's remove RCU's dependency
|
||||
* user_exit() or ct_irq_enter(). Let's remove RCU's dependency
|
||||
* on the tick.
|
||||
*/
|
||||
if (state == CONTEXT_USER) {
|
||||
@ -82,35 +489,77 @@ void noinstr __context_tracking_enter(enum ctx_state state)
|
||||
vtime_user_enter(current);
|
||||
instrumentation_end();
|
||||
}
|
||||
rcu_user_enter();
|
||||
/*
|
||||
* Other than generic entry implementation, we may be past the last
|
||||
* rescheduling opportunity in the entry code. Trigger a self IPI
|
||||
* that will fire and reschedule once we resume in user/guest mode.
|
||||
*/
|
||||
rcu_irq_work_resched();
|
||||
|
||||
/*
|
||||
* Enter RCU idle mode right before resuming userspace. No use of RCU
|
||||
* is permitted between this call and rcu_eqs_exit(). This way the
|
||||
* CPU doesn't need to maintain the tick for RCU maintenance purposes
|
||||
* when the CPU runs in userspace.
|
||||
*/
|
||||
ct_kernel_exit(true, RCU_DYNTICKS_IDX + state);
|
||||
|
||||
/*
|
||||
* Special case if we only track user <-> kernel transitions for tickless
|
||||
* cputime accounting but we don't support RCU extended quiescent state.
|
||||
* In this we case we don't care about any concurrency/ordering.
|
||||
*/
|
||||
if (!IS_ENABLED(CONFIG_CONTEXT_TRACKING_IDLE))
|
||||
atomic_set(&ct->state, state);
|
||||
} else {
|
||||
/*
|
||||
* Even if context tracking is disabled on this CPU, because it's outside
|
||||
* the full dynticks mask for example, we still have to keep track of the
|
||||
* context transitions and states to prevent inconsistency on those of
|
||||
* other CPUs.
|
||||
* If a task triggers an exception in userspace, sleep on the exception
|
||||
* handler and then migrate to another CPU, that new CPU must know where
|
||||
* the exception returns by the time we call exception_exit().
|
||||
* This information can only be provided by the previous CPU when it called
|
||||
* exception_enter().
|
||||
* OTOH we can spare the calls to vtime and RCU when context_tracking.active
|
||||
* is false because we know that CPU is not tickless.
|
||||
*/
|
||||
if (!IS_ENABLED(CONFIG_CONTEXT_TRACKING_IDLE)) {
|
||||
/* Tracking for vtime only, no concurrent RCU EQS accounting */
|
||||
atomic_set(&ct->state, state);
|
||||
} else {
|
||||
/*
|
||||
* Tracking for vtime and RCU EQS. Make sure we don't race
|
||||
* with NMIs. OTOH we don't care about ordering here since
|
||||
* RCU only requires RCU_DYNTICKS_IDX increments to be fully
|
||||
* ordered.
|
||||
*/
|
||||
atomic_add(state, &ct->state);
|
||||
}
|
||||
}
|
||||
/*
|
||||
* Even if context tracking is disabled on this CPU, because it's outside
|
||||
* the full dynticks mask for example, we still have to keep track of the
|
||||
* context transitions and states to prevent inconsistency on those of
|
||||
* other CPUs.
|
||||
* If a task triggers an exception in userspace, sleep on the exception
|
||||
* handler and then migrate to another CPU, that new CPU must know where
|
||||
* the exception returns by the time we call exception_exit().
|
||||
* This information can only be provided by the previous CPU when it called
|
||||
* exception_enter().
|
||||
* OTOH we can spare the calls to vtime and RCU when context_tracking.active
|
||||
* is false because we know that CPU is not tickless.
|
||||
*/
|
||||
__this_cpu_write(context_tracking.state, state);
|
||||
}
|
||||
context_tracking_recursion_exit();
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(__context_tracking_enter);
|
||||
EXPORT_SYMBOL_GPL(__ct_user_enter);
|
||||
|
||||
void context_tracking_enter(enum ctx_state state)
|
||||
/*
|
||||
* OBSOLETE:
|
||||
* This function should be noinstr but the below local_irq_restore() is
|
||||
* unsafe because it involves illegal RCU uses through tracing and lockdep.
|
||||
* This is unlikely to be fixed as this function is obsolete. The preferred
|
||||
* way is to call __context_tracking_enter() through user_enter_irqoff()
|
||||
* or context_tracking_guest_enter(). It should be the arch entry code
|
||||
* responsibility to call into context tracking with IRQs disabled.
|
||||
*/
|
||||
void ct_user_enter(enum ctx_state state)
|
||||
{
|
||||
unsigned long flags;
|
||||
|
||||
/*
|
||||
* Some contexts may involve an exception occuring in an irq,
|
||||
* leading to that nesting:
|
||||
* rcu_irq_enter() rcu_user_exit() rcu_user_exit() rcu_irq_exit()
|
||||
* ct_irq_enter() rcu_eqs_exit(true) rcu_eqs_enter(true) ct_irq_exit()
|
||||
* This would mess up the dyntick_nesting count though. And rcu_irq_*()
|
||||
* helpers are enough to protect RCU uses inside the exception. So
|
||||
* just return immediately if we detect we are in an IRQ.
|
||||
@ -119,21 +568,32 @@ void context_tracking_enter(enum ctx_state state)
|
||||
return;
|
||||
|
||||
local_irq_save(flags);
|
||||
__context_tracking_enter(state);
|
||||
__ct_user_enter(state);
|
||||
local_irq_restore(flags);
|
||||
}
|
||||
NOKPROBE_SYMBOL(context_tracking_enter);
|
||||
EXPORT_SYMBOL_GPL(context_tracking_enter);
|
||||
NOKPROBE_SYMBOL(ct_user_enter);
|
||||
EXPORT_SYMBOL_GPL(ct_user_enter);
|
||||
|
||||
void context_tracking_user_enter(void)
|
||||
/**
|
||||
* user_enter_callable() - Unfortunate ASM callable version of user_enter() for
|
||||
* archs that didn't manage to check the context tracking
|
||||
* static key from low level code.
|
||||
*
|
||||
* This OBSOLETE function should be noinstr but it unsafely calls
|
||||
* local_irq_restore(), involving illegal RCU uses through tracing and lockdep.
|
||||
* This is unlikely to be fixed as this function is obsolete. The preferred
|
||||
* way is to call user_enter_irqoff(). It should be the arch entry code
|
||||
* responsibility to call into context tracking with IRQs disabled.
|
||||
*/
|
||||
void user_enter_callable(void)
|
||||
{
|
||||
user_enter();
|
||||
}
|
||||
NOKPROBE_SYMBOL(context_tracking_user_enter);
|
||||
NOKPROBE_SYMBOL(user_enter_callable);
|
||||
|
||||
/**
|
||||
* context_tracking_exit - Inform the context tracking that the CPU is
|
||||
* exiting user or guest mode and entering the kernel.
|
||||
* __ct_user_exit - Inform the context tracking that the CPU is
|
||||
* exiting user or guest mode and entering the kernel.
|
||||
*
|
||||
* This function must be called after we entered the kernel from user or
|
||||
* guest space before any use of RCU read side critical section. This
|
||||
@ -143,32 +603,64 @@ NOKPROBE_SYMBOL(context_tracking_user_enter);
|
||||
* This call supports re-entrancy. This way it can be called from any exception
|
||||
* handler without needing to know if we came from userspace or not.
|
||||
*/
|
||||
void noinstr __context_tracking_exit(enum ctx_state state)
|
||||
void noinstr __ct_user_exit(enum ctx_state state)
|
||||
{
|
||||
struct context_tracking *ct = this_cpu_ptr(&context_tracking);
|
||||
|
||||
if (!context_tracking_recursion_enter())
|
||||
return;
|
||||
|
||||
if (__this_cpu_read(context_tracking.state) == state) {
|
||||
if (__this_cpu_read(context_tracking.active)) {
|
||||
if (__ct_state() == state) {
|
||||
if (ct->active) {
|
||||
/*
|
||||
* We are going to run code that may use RCU. Inform
|
||||
* RCU core about that (ie: we may need the tick again).
|
||||
* Exit RCU idle mode while entering the kernel because it can
|
||||
* run a RCU read side critical section anytime.
|
||||
*/
|
||||
rcu_user_exit();
|
||||
ct_kernel_enter(true, RCU_DYNTICKS_IDX - state);
|
||||
if (state == CONTEXT_USER) {
|
||||
instrumentation_begin();
|
||||
vtime_user_exit(current);
|
||||
trace_user_exit(0);
|
||||
instrumentation_end();
|
||||
}
|
||||
|
||||
/*
|
||||
* Special case if we only track user <-> kernel transitions for tickless
|
||||
* cputime accounting but we don't support RCU extended quiescent state.
|
||||
* In this we case we don't care about any concurrency/ordering.
|
||||
*/
|
||||
if (!IS_ENABLED(CONFIG_CONTEXT_TRACKING_IDLE))
|
||||
atomic_set(&ct->state, CONTEXT_KERNEL);
|
||||
|
||||
} else {
|
||||
if (!IS_ENABLED(CONFIG_CONTEXT_TRACKING_IDLE)) {
|
||||
/* Tracking for vtime only, no concurrent RCU EQS accounting */
|
||||
atomic_set(&ct->state, CONTEXT_KERNEL);
|
||||
} else {
|
||||
/*
|
||||
* Tracking for vtime and RCU EQS. Make sure we don't race
|
||||
* with NMIs. OTOH we don't care about ordering here since
|
||||
* RCU only requires RCU_DYNTICKS_IDX increments to be fully
|
||||
* ordered.
|
||||
*/
|
||||
atomic_sub(state, &ct->state);
|
||||
}
|
||||
}
|
||||
__this_cpu_write(context_tracking.state, CONTEXT_KERNEL);
|
||||
}
|
||||
context_tracking_recursion_exit();
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(__context_tracking_exit);
|
||||
EXPORT_SYMBOL_GPL(__ct_user_exit);
|
||||
|
||||
void context_tracking_exit(enum ctx_state state)
|
||||
/*
|
||||
* OBSOLETE:
|
||||
* This function should be noinstr but the below local_irq_save() is
|
||||
* unsafe because it involves illegal RCU uses through tracing and lockdep.
|
||||
* This is unlikely to be fixed as this function is obsolete. The preferred
|
||||
* way is to call __context_tracking_exit() through user_exit_irqoff()
|
||||
* or context_tracking_guest_exit(). It should be the arch entry code
|
||||
* responsibility to call into context tracking with IRQs disabled.
|
||||
*/
|
||||
void ct_user_exit(enum ctx_state state)
|
||||
{
|
||||
unsigned long flags;
|
||||
|
||||
@ -176,19 +668,30 @@ void context_tracking_exit(enum ctx_state state)
|
||||
return;
|
||||
|
||||
local_irq_save(flags);
|
||||
__context_tracking_exit(state);
|
||||
__ct_user_exit(state);
|
||||
local_irq_restore(flags);
|
||||
}
|
||||
NOKPROBE_SYMBOL(context_tracking_exit);
|
||||
EXPORT_SYMBOL_GPL(context_tracking_exit);
|
||||
NOKPROBE_SYMBOL(ct_user_exit);
|
||||
EXPORT_SYMBOL_GPL(ct_user_exit);
|
||||
|
||||
void context_tracking_user_exit(void)
|
||||
/**
|
||||
* user_exit_callable() - Unfortunate ASM callable version of user_exit() for
|
||||
* archs that didn't manage to check the context tracking
|
||||
* static key from low level code.
|
||||
*
|
||||
* This OBSOLETE function should be noinstr but it unsafely calls local_irq_save(),
|
||||
* involving illegal RCU uses through tracing and lockdep. This is unlikely
|
||||
* to be fixed as this function is obsolete. The preferred way is to call
|
||||
* user_exit_irqoff(). It should be the arch entry code responsibility to
|
||||
* call into context tracking with IRQs disabled.
|
||||
*/
|
||||
void user_exit_callable(void)
|
||||
{
|
||||
user_exit();
|
||||
}
|
||||
NOKPROBE_SYMBOL(context_tracking_user_exit);
|
||||
NOKPROBE_SYMBOL(user_exit_callable);
|
||||
|
||||
void __init context_tracking_cpu_set(int cpu)
|
||||
void __init ct_cpu_track_user(int cpu)
|
||||
{
|
||||
static __initdata bool initialized = false;
|
||||
|
||||
@ -212,12 +715,14 @@ void __init context_tracking_cpu_set(int cpu)
|
||||
initialized = true;
|
||||
}
|
||||
|
||||
#ifdef CONFIG_CONTEXT_TRACKING_FORCE
|
||||
#ifdef CONFIG_CONTEXT_TRACKING_USER_FORCE
|
||||
void __init context_tracking_init(void)
|
||||
{
|
||||
int cpu;
|
||||
|
||||
for_each_possible_cpu(cpu)
|
||||
context_tracking_cpu_set(cpu);
|
||||
ct_cpu_track_user(cpu);
|
||||
}
|
||||
#endif
|
||||
|
||||
#endif /* #ifdef CONFIG_CONTEXT_TRACKING_USER */
|
||||
|
@ -35,11 +35,11 @@ static int cpu_pm_notify(enum cpu_pm_event event)
|
||||
* disfunctional in cpu idle. Copy RCU_NONIDLE code to let RCU know
|
||||
* this.
|
||||
*/
|
||||
rcu_irq_enter_irqson();
|
||||
ct_irq_enter_irqson();
|
||||
rcu_read_lock();
|
||||
ret = raw_notifier_call_chain(&cpu_pm_notifier.chain, event, NULL);
|
||||
rcu_read_unlock();
|
||||
rcu_irq_exit_irqson();
|
||||
ct_irq_exit_irqson();
|
||||
|
||||
return notifier_to_errno(ret);
|
||||
}
|
||||
@ -49,11 +49,11 @@ static int cpu_pm_notify_robust(enum cpu_pm_event event_up, enum cpu_pm_event ev
|
||||
unsigned long flags;
|
||||
int ret;
|
||||
|
||||
rcu_irq_enter_irqson();
|
||||
ct_irq_enter_irqson();
|
||||
raw_spin_lock_irqsave(&cpu_pm_notifier.lock, flags);
|
||||
ret = raw_notifier_call_chain_robust(&cpu_pm_notifier.chain, event_up, event_down, NULL);
|
||||
raw_spin_unlock_irqrestore(&cpu_pm_notifier.lock, flags);
|
||||
rcu_irq_exit_irqson();
|
||||
ct_irq_exit_irqson();
|
||||
|
||||
return notifier_to_errno(ret);
|
||||
}
|
||||
|
@ -321,7 +321,7 @@ noinstr irqentry_state_t irqentry_enter(struct pt_regs *regs)
|
||||
}
|
||||
|
||||
/*
|
||||
* If this entry hit the idle task invoke rcu_irq_enter() whether
|
||||
* If this entry hit the idle task invoke ct_irq_enter() whether
|
||||
* RCU is watching or not.
|
||||
*
|
||||
* Interrupts can nest when the first interrupt invokes softirq
|
||||
@ -332,12 +332,12 @@ noinstr irqentry_state_t irqentry_enter(struct pt_regs *regs)
|
||||
* not nested into another interrupt.
|
||||
*
|
||||
* Checking for rcu_is_watching() here would prevent the nesting
|
||||
* interrupt to invoke rcu_irq_enter(). If that nested interrupt is
|
||||
* interrupt to invoke ct_irq_enter(). If that nested interrupt is
|
||||
* the tick then rcu_flavor_sched_clock_irq() would wrongfully
|
||||
* assume that it is the first interrupt and eventually claim
|
||||
* quiescent state and end grace periods prematurely.
|
||||
*
|
||||
* Unconditionally invoke rcu_irq_enter() so RCU state stays
|
||||
* Unconditionally invoke ct_irq_enter() so RCU state stays
|
||||
* consistent.
|
||||
*
|
||||
* TINY_RCU does not support EQS, so let the compiler eliminate
|
||||
@ -350,7 +350,7 @@ noinstr irqentry_state_t irqentry_enter(struct pt_regs *regs)
|
||||
* as in irqentry_enter_from_user_mode().
|
||||
*/
|
||||
lockdep_hardirqs_off(CALLER_ADDR0);
|
||||
rcu_irq_enter();
|
||||
ct_irq_enter();
|
||||
instrumentation_begin();
|
||||
trace_hardirqs_off_finish();
|
||||
instrumentation_end();
|
||||
@ -418,7 +418,7 @@ noinstr void irqentry_exit(struct pt_regs *regs, irqentry_state_t state)
|
||||
trace_hardirqs_on_prepare();
|
||||
lockdep_hardirqs_on_prepare();
|
||||
instrumentation_end();
|
||||
rcu_irq_exit();
|
||||
ct_irq_exit();
|
||||
lockdep_hardirqs_on(CALLER_ADDR0);
|
||||
return;
|
||||
}
|
||||
@ -436,7 +436,7 @@ noinstr void irqentry_exit(struct pt_regs *regs, irqentry_state_t state)
|
||||
* was not watching on entry.
|
||||
*/
|
||||
if (state.exit_rcu)
|
||||
rcu_irq_exit();
|
||||
ct_irq_exit();
|
||||
}
|
||||
}
|
||||
|
||||
@ -449,7 +449,7 @@ irqentry_state_t noinstr irqentry_nmi_enter(struct pt_regs *regs)
|
||||
__nmi_enter();
|
||||
lockdep_hardirqs_off(CALLER_ADDR0);
|
||||
lockdep_hardirq_enter();
|
||||
rcu_nmi_enter();
|
||||
ct_nmi_enter();
|
||||
|
||||
instrumentation_begin();
|
||||
trace_hardirqs_off_finish();
|
||||
@ -469,7 +469,7 @@ void noinstr irqentry_nmi_exit(struct pt_regs *regs, irqentry_state_t irq_state)
|
||||
}
|
||||
instrumentation_end();
|
||||
|
||||
rcu_nmi_exit();
|
||||
ct_nmi_exit();
|
||||
lockdep_hardirq_exit();
|
||||
if (irq_state.lockdep)
|
||||
lockdep_hardirqs_on(CALLER_ADDR0);
|
||||
|
@ -114,7 +114,7 @@ int kernel_text_address(unsigned long addr)
|
||||
|
||||
/* Treat this like an NMI as it can happen anywhere */
|
||||
if (no_rcu)
|
||||
rcu_nmi_enter();
|
||||
ct_nmi_enter();
|
||||
|
||||
if (is_module_text_address(addr))
|
||||
goto out;
|
||||
@ -127,7 +127,7 @@ int kernel_text_address(unsigned long addr)
|
||||
ret = 0;
|
||||
out:
|
||||
if (no_rcu)
|
||||
rcu_nmi_exit();
|
||||
ct_nmi_exit();
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
@ -6570,7 +6570,7 @@ void lockdep_rcu_suspicious(const char *file, const int line, const char *s)
|
||||
|
||||
/*
|
||||
* If a CPU is in the RCU-free window in idle (ie: in the section
|
||||
* between rcu_idle_enter() and rcu_idle_exit(), then RCU
|
||||
* between ct_idle_enter() and ct_idle_exit(), then RCU
|
||||
* considers that CPU to be in an "extended quiescent state",
|
||||
* which means that RCU will be completely ignoring that CPU.
|
||||
* Therefore, rcu_read_lock() and friends have absolutely no
|
||||
|
@ -8,6 +8,8 @@ menu "RCU Subsystem"
|
||||
config TREE_RCU
|
||||
bool
|
||||
default y if SMP
|
||||
# Dynticks-idle tracking
|
||||
select CONTEXT_TRACKING_IDLE
|
||||
help
|
||||
This option selects the RCU implementation that is
|
||||
designed for very large SMP system with hundreds or
|
||||
|
@ -12,10 +12,6 @@
|
||||
|
||||
#include <trace/events/rcu.h>
|
||||
|
||||
/* Offset to allow distinguishing irq vs. task-based idle entry/exit. */
|
||||
#define DYNTICK_IRQ_NONIDLE ((LONG_MAX / 2) + 1)
|
||||
|
||||
|
||||
/*
|
||||
* Grace-period counter management.
|
||||
*/
|
||||
|
@ -62,6 +62,7 @@
|
||||
#include <linux/vmalloc.h>
|
||||
#include <linux/mm.h>
|
||||
#include <linux/kasan.h>
|
||||
#include <linux/context_tracking.h>
|
||||
#include "../time/tick-internal.h"
|
||||
|
||||
#include "tree.h"
|
||||
@ -75,9 +76,6 @@
|
||||
/* Data structures. */
|
||||
|
||||
static DEFINE_PER_CPU_SHARED_ALIGNED(struct rcu_data, rcu_data) = {
|
||||
.dynticks_nesting = 1,
|
||||
.dynticks_nmi_nesting = DYNTICK_IRQ_NONIDLE,
|
||||
.dynticks = ATOMIC_INIT(1),
|
||||
#ifdef CONFIG_RCU_NOCB_CPU
|
||||
.cblist.flags = SEGCBLIST_RCU_CORE,
|
||||
#endif
|
||||
@ -266,56 +264,6 @@ void rcu_softirq_qs(void)
|
||||
rcu_tasks_qs(current, false);
|
||||
}
|
||||
|
||||
/*
|
||||
* Increment the current CPU's rcu_data structure's ->dynticks field
|
||||
* with ordering. Return the new value.
|
||||
*/
|
||||
static noinline noinstr unsigned long rcu_dynticks_inc(int incby)
|
||||
{
|
||||
return arch_atomic_add_return(incby, this_cpu_ptr(&rcu_data.dynticks));
|
||||
}
|
||||
|
||||
/*
|
||||
* Record entry into an extended quiescent state. This is only to be
|
||||
* called when not already in an extended quiescent state, that is,
|
||||
* RCU is watching prior to the call to this function and is no longer
|
||||
* watching upon return.
|
||||
*/
|
||||
static noinstr void rcu_dynticks_eqs_enter(void)
|
||||
{
|
||||
int seq;
|
||||
|
||||
/*
|
||||
* CPUs seeing atomic_add_return() must see prior RCU read-side
|
||||
* critical sections, and we also must force ordering with the
|
||||
* next idle sojourn.
|
||||
*/
|
||||
rcu_dynticks_task_trace_enter(); // Before ->dynticks update!
|
||||
seq = rcu_dynticks_inc(1);
|
||||
// RCU is no longer watching. Better be in extended quiescent state!
|
||||
WARN_ON_ONCE(IS_ENABLED(CONFIG_RCU_EQS_DEBUG) && (seq & 0x1));
|
||||
}
|
||||
|
||||
/*
|
||||
* Record exit from an extended quiescent state. This is only to be
|
||||
* called from an extended quiescent state, that is, RCU is not watching
|
||||
* prior to the call to this function and is watching upon return.
|
||||
*/
|
||||
static noinstr void rcu_dynticks_eqs_exit(void)
|
||||
{
|
||||
int seq;
|
||||
|
||||
/*
|
||||
* CPUs seeing atomic_add_return() must see prior idle sojourns,
|
||||
* and we also must force ordering with the next RCU read-side
|
||||
* critical section.
|
||||
*/
|
||||
seq = rcu_dynticks_inc(1);
|
||||
// RCU is now watching. Better not be in an extended quiescent state!
|
||||
rcu_dynticks_task_trace_exit(); // After ->dynticks update!
|
||||
WARN_ON_ONCE(IS_ENABLED(CONFIG_RCU_EQS_DEBUG) && !(seq & 0x1));
|
||||
}
|
||||
|
||||
/*
|
||||
* Reset the current CPU's ->dynticks counter to indicate that the
|
||||
* newly onlined CPU is no longer in an extended quiescent state.
|
||||
@ -328,31 +276,19 @@ static noinstr void rcu_dynticks_eqs_exit(void)
|
||||
*/
|
||||
static void rcu_dynticks_eqs_online(void)
|
||||
{
|
||||
struct rcu_data *rdp = this_cpu_ptr(&rcu_data);
|
||||
|
||||
if (atomic_read(&rdp->dynticks) & 0x1)
|
||||
if (ct_dynticks() & RCU_DYNTICKS_IDX)
|
||||
return;
|
||||
rcu_dynticks_inc(1);
|
||||
}
|
||||
|
||||
/*
|
||||
* Is the current CPU in an extended quiescent state?
|
||||
*
|
||||
* No ordering, as we are sampling CPU-local information.
|
||||
*/
|
||||
static __always_inline bool rcu_dynticks_curr_cpu_in_eqs(void)
|
||||
{
|
||||
return !(arch_atomic_read(this_cpu_ptr(&rcu_data.dynticks)) & 0x1);
|
||||
ct_state_inc(RCU_DYNTICKS_IDX);
|
||||
}
|
||||
|
||||
/*
|
||||
* Snapshot the ->dynticks counter with full ordering so as to allow
|
||||
* stable comparison of this counter with past and future snapshots.
|
||||
*/
|
||||
static int rcu_dynticks_snap(struct rcu_data *rdp)
|
||||
static int rcu_dynticks_snap(int cpu)
|
||||
{
|
||||
smp_mb(); // Fundamental RCU ordering guarantee.
|
||||
return atomic_read_acquire(&rdp->dynticks);
|
||||
return ct_dynticks_cpu_acquire(cpu);
|
||||
}
|
||||
|
||||
/*
|
||||
@ -361,15 +297,13 @@ static int rcu_dynticks_snap(struct rcu_data *rdp)
|
||||
*/
|
||||
static bool rcu_dynticks_in_eqs(int snap)
|
||||
{
|
||||
return !(snap & 0x1);
|
||||
return !(snap & RCU_DYNTICKS_IDX);
|
||||
}
|
||||
|
||||
/* Return true if the specified CPU is currently idle from an RCU viewpoint. */
|
||||
bool rcu_is_idle_cpu(int cpu)
|
||||
{
|
||||
struct rcu_data *rdp = per_cpu_ptr(&rcu_data, cpu);
|
||||
|
||||
return rcu_dynticks_in_eqs(rcu_dynticks_snap(rdp));
|
||||
return rcu_dynticks_in_eqs(rcu_dynticks_snap(cpu));
|
||||
}
|
||||
|
||||
/*
|
||||
@ -379,7 +313,7 @@ bool rcu_is_idle_cpu(int cpu)
|
||||
*/
|
||||
static bool rcu_dynticks_in_eqs_since(struct rcu_data *rdp, int snap)
|
||||
{
|
||||
return snap != rcu_dynticks_snap(rdp);
|
||||
return snap != rcu_dynticks_snap(rdp->cpu);
|
||||
}
|
||||
|
||||
/*
|
||||
@ -388,19 +322,17 @@ static bool rcu_dynticks_in_eqs_since(struct rcu_data *rdp, int snap)
|
||||
*/
|
||||
bool rcu_dynticks_zero_in_eqs(int cpu, int *vp)
|
||||
{
|
||||
struct rcu_data *rdp = per_cpu_ptr(&rcu_data, cpu);
|
||||
int snap;
|
||||
|
||||
// If not quiescent, force back to earlier extended quiescent state.
|
||||
snap = atomic_read(&rdp->dynticks) & ~0x1;
|
||||
|
||||
snap = ct_dynticks_cpu(cpu) & ~RCU_DYNTICKS_IDX;
|
||||
smp_rmb(); // Order ->dynticks and *vp reads.
|
||||
if (READ_ONCE(*vp))
|
||||
return false; // Non-zero, so report failure;
|
||||
smp_rmb(); // Order *vp read and ->dynticks re-read.
|
||||
|
||||
// If still in the same extended quiescent state, we are good!
|
||||
return snap == atomic_read(&rdp->dynticks);
|
||||
return snap == ct_dynticks_cpu(cpu);
|
||||
}
|
||||
|
||||
/*
|
||||
@ -419,9 +351,9 @@ notrace void rcu_momentary_dyntick_idle(void)
|
||||
int seq;
|
||||
|
||||
raw_cpu_write(rcu_data.rcu_need_heavy_qs, false);
|
||||
seq = rcu_dynticks_inc(2);
|
||||
seq = ct_state_inc(2 * RCU_DYNTICKS_IDX);
|
||||
/* It is illegal to call this from idle state. */
|
||||
WARN_ON_ONCE(!(seq & 0x1));
|
||||
WARN_ON_ONCE(!(seq & RCU_DYNTICKS_IDX));
|
||||
rcu_preempt_deferred_qs(current);
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(rcu_momentary_dyntick_idle);
|
||||
@ -446,13 +378,13 @@ static int rcu_is_cpu_rrupt_from_idle(void)
|
||||
lockdep_assert_irqs_disabled();
|
||||
|
||||
/* Check for counter underflows */
|
||||
RCU_LOCKDEP_WARN(__this_cpu_read(rcu_data.dynticks_nesting) < 0,
|
||||
RCU_LOCKDEP_WARN(ct_dynticks_nesting() < 0,
|
||||
"RCU dynticks_nesting counter underflow!");
|
||||
RCU_LOCKDEP_WARN(__this_cpu_read(rcu_data.dynticks_nmi_nesting) <= 0,
|
||||
RCU_LOCKDEP_WARN(ct_dynticks_nmi_nesting() <= 0,
|
||||
"RCU dynticks_nmi_nesting counter underflow/zero!");
|
||||
|
||||
/* Are we at first interrupt nesting level? */
|
||||
nesting = __this_cpu_read(rcu_data.dynticks_nmi_nesting);
|
||||
nesting = ct_dynticks_nmi_nesting();
|
||||
if (nesting > 1)
|
||||
return false;
|
||||
|
||||
@ -462,7 +394,7 @@ static int rcu_is_cpu_rrupt_from_idle(void)
|
||||
WARN_ON_ONCE(!nesting && !is_idle_task(current));
|
||||
|
||||
/* Does CPU appear to be idle from an RCU standpoint? */
|
||||
return __this_cpu_read(rcu_data.dynticks_nesting) == 0;
|
||||
return ct_dynticks_nesting() == 0;
|
||||
}
|
||||
|
||||
#define DEFAULT_RCU_BLIMIT (IS_ENABLED(CONFIG_RCU_STRICT_GRACE_PERIOD) ? 1000 : 10)
|
||||
@ -613,66 +545,7 @@ void rcutorture_get_gp_data(enum rcutorture_type test_type, int *flags,
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(rcutorture_get_gp_data);
|
||||
|
||||
/*
|
||||
* Enter an RCU extended quiescent state, which can be either the
|
||||
* idle loop or adaptive-tickless usermode execution.
|
||||
*
|
||||
* We crowbar the ->dynticks_nmi_nesting field to zero to allow for
|
||||
* the possibility of usermode upcalls having messed up our count
|
||||
* of interrupt nesting level during the prior busy period.
|
||||
*/
|
||||
static noinstr void rcu_eqs_enter(bool user)
|
||||
{
|
||||
struct rcu_data *rdp = this_cpu_ptr(&rcu_data);
|
||||
|
||||
WARN_ON_ONCE(rdp->dynticks_nmi_nesting != DYNTICK_IRQ_NONIDLE);
|
||||
WRITE_ONCE(rdp->dynticks_nmi_nesting, 0);
|
||||
WARN_ON_ONCE(IS_ENABLED(CONFIG_RCU_EQS_DEBUG) &&
|
||||
rdp->dynticks_nesting == 0);
|
||||
if (rdp->dynticks_nesting != 1) {
|
||||
// RCU will still be watching, so just do accounting and leave.
|
||||
rdp->dynticks_nesting--;
|
||||
return;
|
||||
}
|
||||
|
||||
lockdep_assert_irqs_disabled();
|
||||
instrumentation_begin();
|
||||
trace_rcu_dyntick(TPS("Start"), rdp->dynticks_nesting, 0, atomic_read(&rdp->dynticks));
|
||||
WARN_ON_ONCE(IS_ENABLED(CONFIG_RCU_EQS_DEBUG) && !user && !is_idle_task(current));
|
||||
rcu_preempt_deferred_qs(current);
|
||||
|
||||
// instrumentation for the noinstr rcu_dynticks_eqs_enter()
|
||||
instrument_atomic_write(&rdp->dynticks, sizeof(rdp->dynticks));
|
||||
|
||||
instrumentation_end();
|
||||
WRITE_ONCE(rdp->dynticks_nesting, 0); /* Avoid irq-access tearing. */
|
||||
// RCU is watching here ...
|
||||
rcu_dynticks_eqs_enter();
|
||||
// ... but is no longer watching here.
|
||||
rcu_dynticks_task_enter();
|
||||
}
|
||||
|
||||
/**
|
||||
* rcu_idle_enter - inform RCU that current CPU is entering idle
|
||||
*
|
||||
* Enter idle mode, in other words, -leave- the mode in which RCU
|
||||
* read-side critical sections can occur. (Though RCU read-side
|
||||
* critical sections can occur in irq handlers in idle, a possibility
|
||||
* handled by irq_enter() and irq_exit().)
|
||||
*
|
||||
* If you add or remove a call to rcu_idle_enter(), be sure to test with
|
||||
* CONFIG_RCU_EQS_DEBUG=y.
|
||||
*/
|
||||
void rcu_idle_enter(void)
|
||||
{
|
||||
lockdep_assert_irqs_disabled();
|
||||
rcu_eqs_enter(false);
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(rcu_idle_enter);
|
||||
|
||||
#ifdef CONFIG_NO_HZ_FULL
|
||||
|
||||
#if !defined(CONFIG_GENERIC_ENTRY) || !defined(CONFIG_KVM_XFER_TO_GUEST_WORK)
|
||||
#if defined(CONFIG_NO_HZ_FULL) && (!defined(CONFIG_GENERIC_ENTRY) || !defined(CONFIG_KVM_XFER_TO_GUEST_WORK))
|
||||
/*
|
||||
* An empty function that will trigger a reschedule on
|
||||
* IRQ tail once IRQs get re-enabled on userspace/guest resume.
|
||||
@ -694,7 +567,7 @@ static DEFINE_PER_CPU(struct irq_work, late_wakeup_work) =
|
||||
* last resort is to fire a local irq_work that will trigger a reschedule once IRQs
|
||||
* get re-enabled again.
|
||||
*/
|
||||
noinstr static void rcu_irq_work_resched(void)
|
||||
noinstr void rcu_irq_work_resched(void)
|
||||
{
|
||||
struct rcu_data *rdp = this_cpu_ptr(&rcu_data);
|
||||
|
||||
@ -710,114 +583,7 @@ noinstr static void rcu_irq_work_resched(void)
|
||||
}
|
||||
instrumentation_end();
|
||||
}
|
||||
|
||||
#else
|
||||
static inline void rcu_irq_work_resched(void) { }
|
||||
#endif
|
||||
|
||||
/**
|
||||
* rcu_user_enter - inform RCU that we are resuming userspace.
|
||||
*
|
||||
* Enter RCU idle mode right before resuming userspace. No use of RCU
|
||||
* is permitted between this call and rcu_user_exit(). This way the
|
||||
* CPU doesn't need to maintain the tick for RCU maintenance purposes
|
||||
* when the CPU runs in userspace.
|
||||
*
|
||||
* If you add or remove a call to rcu_user_enter(), be sure to test with
|
||||
* CONFIG_RCU_EQS_DEBUG=y.
|
||||
*/
|
||||
noinstr void rcu_user_enter(void)
|
||||
{
|
||||
lockdep_assert_irqs_disabled();
|
||||
|
||||
/*
|
||||
* Other than generic entry implementation, we may be past the last
|
||||
* rescheduling opportunity in the entry code. Trigger a self IPI
|
||||
* that will fire and reschedule once we resume in user/guest mode.
|
||||
*/
|
||||
rcu_irq_work_resched();
|
||||
rcu_eqs_enter(true);
|
||||
}
|
||||
|
||||
#endif /* CONFIG_NO_HZ_FULL */
|
||||
|
||||
/**
|
||||
* rcu_nmi_exit - inform RCU of exit from NMI context
|
||||
*
|
||||
* If we are returning from the outermost NMI handler that interrupted an
|
||||
* RCU-idle period, update rdp->dynticks and rdp->dynticks_nmi_nesting
|
||||
* to let the RCU grace-period handling know that the CPU is back to
|
||||
* being RCU-idle.
|
||||
*
|
||||
* If you add or remove a call to rcu_nmi_exit(), be sure to test
|
||||
* with CONFIG_RCU_EQS_DEBUG=y.
|
||||
*/
|
||||
noinstr void rcu_nmi_exit(void)
|
||||
{
|
||||
struct rcu_data *rdp = this_cpu_ptr(&rcu_data);
|
||||
|
||||
instrumentation_begin();
|
||||
/*
|
||||
* Check for ->dynticks_nmi_nesting underflow and bad ->dynticks.
|
||||
* (We are exiting an NMI handler, so RCU better be paying attention
|
||||
* to us!)
|
||||
*/
|
||||
WARN_ON_ONCE(rdp->dynticks_nmi_nesting <= 0);
|
||||
WARN_ON_ONCE(rcu_dynticks_curr_cpu_in_eqs());
|
||||
|
||||
/*
|
||||
* If the nesting level is not 1, the CPU wasn't RCU-idle, so
|
||||
* leave it in non-RCU-idle state.
|
||||
*/
|
||||
if (rdp->dynticks_nmi_nesting != 1) {
|
||||
trace_rcu_dyntick(TPS("--="), rdp->dynticks_nmi_nesting, rdp->dynticks_nmi_nesting - 2,
|
||||
atomic_read(&rdp->dynticks));
|
||||
WRITE_ONCE(rdp->dynticks_nmi_nesting, /* No store tearing. */
|
||||
rdp->dynticks_nmi_nesting - 2);
|
||||
instrumentation_end();
|
||||
return;
|
||||
}
|
||||
|
||||
/* This NMI interrupted an RCU-idle CPU, restore RCU-idleness. */
|
||||
trace_rcu_dyntick(TPS("Startirq"), rdp->dynticks_nmi_nesting, 0, atomic_read(&rdp->dynticks));
|
||||
WRITE_ONCE(rdp->dynticks_nmi_nesting, 0); /* Avoid store tearing. */
|
||||
|
||||
// instrumentation for the noinstr rcu_dynticks_eqs_enter()
|
||||
instrument_atomic_write(&rdp->dynticks, sizeof(rdp->dynticks));
|
||||
instrumentation_end();
|
||||
|
||||
// RCU is watching here ...
|
||||
rcu_dynticks_eqs_enter();
|
||||
// ... but is no longer watching here.
|
||||
|
||||
if (!in_nmi())
|
||||
rcu_dynticks_task_enter();
|
||||
}
|
||||
|
||||
/**
|
||||
* rcu_irq_exit - inform RCU that current CPU is exiting irq towards idle
|
||||
*
|
||||
* Exit from an interrupt handler, which might possibly result in entering
|
||||
* idle mode, in other words, leaving the mode in which read-side critical
|
||||
* sections can occur. The caller must have disabled interrupts.
|
||||
*
|
||||
* This code assumes that the idle loop never does anything that might
|
||||
* result in unbalanced calls to irq_enter() and irq_exit(). If your
|
||||
* architecture's idle loop violates this assumption, RCU will give you what
|
||||
* you deserve, good and hard. But very infrequently and irreproducibly.
|
||||
*
|
||||
* Use things like work queues to work around this limitation.
|
||||
*
|
||||
* You have been warned.
|
||||
*
|
||||
* If you add or remove a call to rcu_irq_exit(), be sure to test with
|
||||
* CONFIG_RCU_EQS_DEBUG=y.
|
||||
*/
|
||||
void noinstr rcu_irq_exit(void)
|
||||
{
|
||||
lockdep_assert_irqs_disabled();
|
||||
rcu_nmi_exit();
|
||||
}
|
||||
#endif /* #if defined(CONFIG_NO_HZ_FULL) && (!defined(CONFIG_GENERIC_ENTRY) || !defined(CONFIG_KVM_XFER_TO_GUEST_WORK)) */
|
||||
|
||||
#ifdef CONFIG_PROVE_RCU
|
||||
/**
|
||||
@ -827,9 +593,9 @@ void rcu_irq_exit_check_preempt(void)
|
||||
{
|
||||
lockdep_assert_irqs_disabled();
|
||||
|
||||
RCU_LOCKDEP_WARN(__this_cpu_read(rcu_data.dynticks_nesting) <= 0,
|
||||
RCU_LOCKDEP_WARN(ct_dynticks_nesting() <= 0,
|
||||
"RCU dynticks_nesting counter underflow/zero!");
|
||||
RCU_LOCKDEP_WARN(__this_cpu_read(rcu_data.dynticks_nmi_nesting) !=
|
||||
RCU_LOCKDEP_WARN(ct_dynticks_nmi_nesting() !=
|
||||
DYNTICK_IRQ_NONIDLE,
|
||||
"Bad RCU dynticks_nmi_nesting counter\n");
|
||||
RCU_LOCKDEP_WARN(rcu_dynticks_curr_cpu_in_eqs(),
|
||||
@ -837,94 +603,7 @@ void rcu_irq_exit_check_preempt(void)
|
||||
}
|
||||
#endif /* #ifdef CONFIG_PROVE_RCU */
|
||||
|
||||
/*
|
||||
* Wrapper for rcu_irq_exit() where interrupts are enabled.
|
||||
*
|
||||
* If you add or remove a call to rcu_irq_exit_irqson(), be sure to test
|
||||
* with CONFIG_RCU_EQS_DEBUG=y.
|
||||
*/
|
||||
void rcu_irq_exit_irqson(void)
|
||||
{
|
||||
unsigned long flags;
|
||||
|
||||
local_irq_save(flags);
|
||||
rcu_irq_exit();
|
||||
local_irq_restore(flags);
|
||||
}
|
||||
|
||||
/*
|
||||
* Exit an RCU extended quiescent state, which can be either the
|
||||
* idle loop or adaptive-tickless usermode execution.
|
||||
*
|
||||
* We crowbar the ->dynticks_nmi_nesting field to DYNTICK_IRQ_NONIDLE to
|
||||
* allow for the possibility of usermode upcalls messing up our count of
|
||||
* interrupt nesting level during the busy period that is just now starting.
|
||||
*/
|
||||
static void noinstr rcu_eqs_exit(bool user)
|
||||
{
|
||||
struct rcu_data *rdp;
|
||||
long oldval;
|
||||
|
||||
lockdep_assert_irqs_disabled();
|
||||
rdp = this_cpu_ptr(&rcu_data);
|
||||
oldval = rdp->dynticks_nesting;
|
||||
WARN_ON_ONCE(IS_ENABLED(CONFIG_RCU_EQS_DEBUG) && oldval < 0);
|
||||
if (oldval) {
|
||||
// RCU was already watching, so just do accounting and leave.
|
||||
rdp->dynticks_nesting++;
|
||||
return;
|
||||
}
|
||||
rcu_dynticks_task_exit();
|
||||
// RCU is not watching here ...
|
||||
rcu_dynticks_eqs_exit();
|
||||
// ... but is watching here.
|
||||
instrumentation_begin();
|
||||
|
||||
// instrumentation for the noinstr rcu_dynticks_eqs_exit()
|
||||
instrument_atomic_write(&rdp->dynticks, sizeof(rdp->dynticks));
|
||||
|
||||
trace_rcu_dyntick(TPS("End"), rdp->dynticks_nesting, 1, atomic_read(&rdp->dynticks));
|
||||
WARN_ON_ONCE(IS_ENABLED(CONFIG_RCU_EQS_DEBUG) && !user && !is_idle_task(current));
|
||||
WRITE_ONCE(rdp->dynticks_nesting, 1);
|
||||
WARN_ON_ONCE(rdp->dynticks_nmi_nesting);
|
||||
WRITE_ONCE(rdp->dynticks_nmi_nesting, DYNTICK_IRQ_NONIDLE);
|
||||
instrumentation_end();
|
||||
}
|
||||
|
||||
/**
|
||||
* rcu_idle_exit - inform RCU that current CPU is leaving idle
|
||||
*
|
||||
* Exit idle mode, in other words, -enter- the mode in which RCU
|
||||
* read-side critical sections can occur.
|
||||
*
|
||||
* If you add or remove a call to rcu_idle_exit(), be sure to test with
|
||||
* CONFIG_RCU_EQS_DEBUG=y.
|
||||
*/
|
||||
void rcu_idle_exit(void)
|
||||
{
|
||||
unsigned long flags;
|
||||
|
||||
local_irq_save(flags);
|
||||
rcu_eqs_exit(false);
|
||||
local_irq_restore(flags);
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(rcu_idle_exit);
|
||||
|
||||
#ifdef CONFIG_NO_HZ_FULL
|
||||
/**
|
||||
* rcu_user_exit - inform RCU that we are exiting userspace.
|
||||
*
|
||||
* Exit RCU idle mode while entering the kernel because it can
|
||||
* run a RCU read side critical section anytime.
|
||||
*
|
||||
* If you add or remove a call to rcu_user_exit(), be sure to test with
|
||||
* CONFIG_RCU_EQS_DEBUG=y.
|
||||
*/
|
||||
void noinstr rcu_user_exit(void)
|
||||
{
|
||||
rcu_eqs_exit(true);
|
||||
}
|
||||
|
||||
/**
|
||||
* __rcu_irq_enter_check_tick - Enable scheduler tick on CPU if RCU needs it.
|
||||
*
|
||||
@ -987,109 +666,6 @@ void __rcu_irq_enter_check_tick(void)
|
||||
}
|
||||
#endif /* CONFIG_NO_HZ_FULL */
|
||||
|
||||
/**
|
||||
* rcu_nmi_enter - inform RCU of entry to NMI context
|
||||
*
|
||||
* If the CPU was idle from RCU's viewpoint, update rdp->dynticks and
|
||||
* rdp->dynticks_nmi_nesting to let the RCU grace-period handling know
|
||||
* that the CPU is active. This implementation permits nested NMIs, as
|
||||
* long as the nesting level does not overflow an int. (You will probably
|
||||
* run out of stack space first.)
|
||||
*
|
||||
* If you add or remove a call to rcu_nmi_enter(), be sure to test
|
||||
* with CONFIG_RCU_EQS_DEBUG=y.
|
||||
*/
|
||||
noinstr void rcu_nmi_enter(void)
|
||||
{
|
||||
long incby = 2;
|
||||
struct rcu_data *rdp = this_cpu_ptr(&rcu_data);
|
||||
|
||||
/* Complain about underflow. */
|
||||
WARN_ON_ONCE(rdp->dynticks_nmi_nesting < 0);
|
||||
|
||||
/*
|
||||
* If idle from RCU viewpoint, atomically increment ->dynticks
|
||||
* to mark non-idle and increment ->dynticks_nmi_nesting by one.
|
||||
* Otherwise, increment ->dynticks_nmi_nesting by two. This means
|
||||
* if ->dynticks_nmi_nesting is equal to one, we are guaranteed
|
||||
* to be in the outermost NMI handler that interrupted an RCU-idle
|
||||
* period (observation due to Andy Lutomirski).
|
||||
*/
|
||||
if (rcu_dynticks_curr_cpu_in_eqs()) {
|
||||
|
||||
if (!in_nmi())
|
||||
rcu_dynticks_task_exit();
|
||||
|
||||
// RCU is not watching here ...
|
||||
rcu_dynticks_eqs_exit();
|
||||
// ... but is watching here.
|
||||
|
||||
instrumentation_begin();
|
||||
// instrumentation for the noinstr rcu_dynticks_curr_cpu_in_eqs()
|
||||
instrument_atomic_read(&rdp->dynticks, sizeof(rdp->dynticks));
|
||||
// instrumentation for the noinstr rcu_dynticks_eqs_exit()
|
||||
instrument_atomic_write(&rdp->dynticks, sizeof(rdp->dynticks));
|
||||
|
||||
incby = 1;
|
||||
} else if (!in_nmi()) {
|
||||
instrumentation_begin();
|
||||
rcu_irq_enter_check_tick();
|
||||
} else {
|
||||
instrumentation_begin();
|
||||
}
|
||||
|
||||
trace_rcu_dyntick(incby == 1 ? TPS("Endirq") : TPS("++="),
|
||||
rdp->dynticks_nmi_nesting,
|
||||
rdp->dynticks_nmi_nesting + incby, atomic_read(&rdp->dynticks));
|
||||
instrumentation_end();
|
||||
WRITE_ONCE(rdp->dynticks_nmi_nesting, /* Prevent store tearing. */
|
||||
rdp->dynticks_nmi_nesting + incby);
|
||||
barrier();
|
||||
}
|
||||
|
||||
/**
|
||||
* rcu_irq_enter - inform RCU that current CPU is entering irq away from idle
|
||||
*
|
||||
* Enter an interrupt handler, which might possibly result in exiting
|
||||
* idle mode, in other words, entering the mode in which read-side critical
|
||||
* sections can occur. The caller must have disabled interrupts.
|
||||
*
|
||||
* Note that the Linux kernel is fully capable of entering an interrupt
|
||||
* handler that it never exits, for example when doing upcalls to user mode!
|
||||
* This code assumes that the idle loop never does upcalls to user mode.
|
||||
* If your architecture's idle loop does do upcalls to user mode (or does
|
||||
* anything else that results in unbalanced calls to the irq_enter() and
|
||||
* irq_exit() functions), RCU will give you what you deserve, good and hard.
|
||||
* But very infrequently and irreproducibly.
|
||||
*
|
||||
* Use things like work queues to work around this limitation.
|
||||
*
|
||||
* You have been warned.
|
||||
*
|
||||
* If you add or remove a call to rcu_irq_enter(), be sure to test with
|
||||
* CONFIG_RCU_EQS_DEBUG=y.
|
||||
*/
|
||||
noinstr void rcu_irq_enter(void)
|
||||
{
|
||||
lockdep_assert_irqs_disabled();
|
||||
rcu_nmi_enter();
|
||||
}
|
||||
|
||||
/*
|
||||
* Wrapper for rcu_irq_enter() where interrupts are enabled.
|
||||
*
|
||||
* If you add or remove a call to rcu_irq_enter_irqson(), be sure to test
|
||||
* with CONFIG_RCU_EQS_DEBUG=y.
|
||||
*/
|
||||
void rcu_irq_enter_irqson(void)
|
||||
{
|
||||
unsigned long flags;
|
||||
|
||||
local_irq_save(flags);
|
||||
rcu_irq_enter();
|
||||
local_irq_restore(flags);
|
||||
}
|
||||
|
||||
/*
|
||||
* Check to see if any future non-offloaded RCU-related work will need
|
||||
* to be done by the current CPU, even if none need be done immediately,
|
||||
@ -1227,7 +803,7 @@ static void rcu_gpnum_ovf(struct rcu_node *rnp, struct rcu_data *rdp)
|
||||
*/
|
||||
static int dyntick_save_progress_counter(struct rcu_data *rdp)
|
||||
{
|
||||
rdp->dynticks_snap = rcu_dynticks_snap(rdp);
|
||||
rdp->dynticks_snap = rcu_dynticks_snap(rdp->cpu);
|
||||
if (rcu_dynticks_in_eqs(rdp->dynticks_snap)) {
|
||||
trace_rcu_fqs(rcu_state.name, rdp->gp_seq, rdp->cpu, TPS("dti"));
|
||||
rcu_gpnum_ovf(rdp->mynode, rdp);
|
||||
@ -4328,13 +3904,14 @@ static void rcu_init_new_rnp(struct rcu_node *rnp_leaf)
|
||||
static void __init
|
||||
rcu_boot_init_percpu_data(int cpu)
|
||||
{
|
||||
struct context_tracking *ct = this_cpu_ptr(&context_tracking);
|
||||
struct rcu_data *rdp = per_cpu_ptr(&rcu_data, cpu);
|
||||
|
||||
/* Set up local state, ensuring consistent view of global state. */
|
||||
rdp->grpmask = leaf_node_cpu_bit(rdp->mynode, cpu);
|
||||
INIT_WORK(&rdp->strict_work, strict_work_handler);
|
||||
WARN_ON_ONCE(rdp->dynticks_nesting != 1);
|
||||
WARN_ON_ONCE(rcu_dynticks_in_eqs(rcu_dynticks_snap(rdp)));
|
||||
WARN_ON_ONCE(ct->dynticks_nesting != 1);
|
||||
WARN_ON_ONCE(rcu_dynticks_in_eqs(rcu_dynticks_snap(cpu)));
|
||||
rdp->barrier_seq_snap = rcu_state.barrier_sequence;
|
||||
rdp->rcu_ofl_gp_seq = rcu_state.gp_seq;
|
||||
rdp->rcu_ofl_gp_flags = RCU_GP_CLEANED;
|
||||
@ -4358,6 +3935,7 @@ rcu_boot_init_percpu_data(int cpu)
|
||||
int rcutree_prepare_cpu(unsigned int cpu)
|
||||
{
|
||||
unsigned long flags;
|
||||
struct context_tracking *ct = per_cpu_ptr(&context_tracking, cpu);
|
||||
struct rcu_data *rdp = per_cpu_ptr(&rcu_data, cpu);
|
||||
struct rcu_node *rnp = rcu_get_root();
|
||||
|
||||
@ -4366,7 +3944,7 @@ int rcutree_prepare_cpu(unsigned int cpu)
|
||||
rdp->qlen_last_fqs_check = 0;
|
||||
rdp->n_force_qs_snap = READ_ONCE(rcu_state.n_force_qs);
|
||||
rdp->blimit = blimit;
|
||||
rdp->dynticks_nesting = 1; /* CPU not up, no tearing. */
|
||||
ct->dynticks_nesting = 1; /* CPU not up, no tearing. */
|
||||
raw_spin_unlock_rcu_node(rnp); /* irqs remain disabled. */
|
||||
|
||||
/*
|
||||
|
@ -191,9 +191,6 @@ struct rcu_data {
|
||||
|
||||
/* 3) dynticks interface. */
|
||||
int dynticks_snap; /* Per-GP tracking for dynticks. */
|
||||
long dynticks_nesting; /* Track process nesting level. */
|
||||
long dynticks_nmi_nesting; /* Track irq/NMI nesting level. */
|
||||
atomic_t dynticks; /* Even value for idle, else odd. */
|
||||
bool rcu_need_heavy_qs; /* GP old, so heavy quiescent state! */
|
||||
bool rcu_urgent_qs; /* GP old need light quiescent state. */
|
||||
bool rcu_forced_tick; /* Forced tick to provide QS. */
|
||||
@ -438,7 +435,6 @@ static void rcu_cpu_kthread_setup(unsigned int cpu);
|
||||
static void rcu_spawn_one_boost_kthread(struct rcu_node *rnp);
|
||||
static bool rcu_preempt_has_tasks(struct rcu_node *rnp);
|
||||
static bool rcu_preempt_need_deferred_qs(struct task_struct *t);
|
||||
static void rcu_preempt_deferred_qs(struct task_struct *t);
|
||||
static void zero_cpu_stall_ticks(struct rcu_data *rdp);
|
||||
static struct swait_queue_head *rcu_nocb_gp_get(struct rcu_node *rnp);
|
||||
static void rcu_nocb_gp_cleanup(struct swait_queue_head *sq);
|
||||
@ -478,10 +474,6 @@ do { \
|
||||
|
||||
static void rcu_bind_gp_kthread(void);
|
||||
static bool rcu_nohz_full_cpu(void);
|
||||
static void rcu_dynticks_task_enter(void);
|
||||
static void rcu_dynticks_task_exit(void);
|
||||
static void rcu_dynticks_task_trace_enter(void);
|
||||
static void rcu_dynticks_task_trace_exit(void);
|
||||
|
||||
/* Forward declarations for tree_stall.h */
|
||||
static void record_gp_stall_check_time(void);
|
||||
|
@ -358,7 +358,7 @@ static void __sync_rcu_exp_select_node_cpus(struct rcu_exp_work *rewp)
|
||||
!(rnp->qsmaskinitnext & mask)) {
|
||||
mask_ofl_test |= mask;
|
||||
} else {
|
||||
snap = rcu_dynticks_snap(rdp);
|
||||
snap = rcu_dynticks_snap(cpu);
|
||||
if (rcu_dynticks_in_eqs(snap))
|
||||
mask_ofl_test |= mask;
|
||||
else
|
||||
|
@ -595,7 +595,7 @@ static notrace bool rcu_preempt_need_deferred_qs(struct task_struct *t)
|
||||
* evaluate safety in terms of interrupt, softirq, and preemption
|
||||
* disabling.
|
||||
*/
|
||||
static notrace void rcu_preempt_deferred_qs(struct task_struct *t)
|
||||
notrace void rcu_preempt_deferred_qs(struct task_struct *t)
|
||||
{
|
||||
unsigned long flags;
|
||||
|
||||
@ -935,7 +935,7 @@ static notrace bool rcu_preempt_need_deferred_qs(struct task_struct *t)
|
||||
// period for a quiescent state from this CPU. Note that requests from
|
||||
// tasks are handled when removing the task from the blocked-tasks list
|
||||
// below.
|
||||
static notrace void rcu_preempt_deferred_qs(struct task_struct *t)
|
||||
notrace void rcu_preempt_deferred_qs(struct task_struct *t)
|
||||
{
|
||||
struct rcu_data *rdp = this_cpu_ptr(&rcu_data);
|
||||
|
||||
@ -1296,37 +1296,3 @@ static void rcu_bind_gp_kthread(void)
|
||||
return;
|
||||
housekeeping_affine(current, HK_TYPE_RCU);
|
||||
}
|
||||
|
||||
/* Record the current task on dyntick-idle entry. */
|
||||
static __always_inline void rcu_dynticks_task_enter(void)
|
||||
{
|
||||
#if defined(CONFIG_TASKS_RCU) && defined(CONFIG_NO_HZ_FULL)
|
||||
WRITE_ONCE(current->rcu_tasks_idle_cpu, smp_processor_id());
|
||||
#endif /* #if defined(CONFIG_TASKS_RCU) && defined(CONFIG_NO_HZ_FULL) */
|
||||
}
|
||||
|
||||
/* Record no current task on dyntick-idle exit. */
|
||||
static __always_inline void rcu_dynticks_task_exit(void)
|
||||
{
|
||||
#if defined(CONFIG_TASKS_RCU) && defined(CONFIG_NO_HZ_FULL)
|
||||
WRITE_ONCE(current->rcu_tasks_idle_cpu, -1);
|
||||
#endif /* #if defined(CONFIG_TASKS_RCU) && defined(CONFIG_NO_HZ_FULL) */
|
||||
}
|
||||
|
||||
/* Turn on heavyweight RCU tasks trace readers on idle/user entry. */
|
||||
static __always_inline void rcu_dynticks_task_trace_enter(void)
|
||||
{
|
||||
#ifdef CONFIG_TASKS_TRACE_RCU
|
||||
if (IS_ENABLED(CONFIG_TASKS_TRACE_RCU_READ_MB))
|
||||
current->trc_reader_special.b.need_mb = true;
|
||||
#endif /* #ifdef CONFIG_TASKS_TRACE_RCU */
|
||||
}
|
||||
|
||||
/* Turn off heavyweight RCU tasks trace readers on idle/user exit. */
|
||||
static __always_inline void rcu_dynticks_task_trace_exit(void)
|
||||
{
|
||||
#ifdef CONFIG_TASKS_TRACE_RCU
|
||||
if (IS_ENABLED(CONFIG_TASKS_TRACE_RCU_READ_MB))
|
||||
current->trc_reader_special.b.need_mb = false;
|
||||
#endif /* #ifdef CONFIG_TASKS_TRACE_RCU */
|
||||
}
|
||||
|
@ -409,7 +409,19 @@ static bool rcu_is_gp_kthread_starving(unsigned long *jp)
|
||||
|
||||
static bool rcu_is_rcuc_kthread_starving(struct rcu_data *rdp, unsigned long *jp)
|
||||
{
|
||||
unsigned long j = jiffies - READ_ONCE(rdp->rcuc_activity);
|
||||
int cpu;
|
||||
struct task_struct *rcuc;
|
||||
unsigned long j;
|
||||
|
||||
rcuc = rdp->rcu_cpu_kthread_task;
|
||||
if (!rcuc)
|
||||
return false;
|
||||
|
||||
cpu = task_cpu(rcuc);
|
||||
if (cpu_is_offline(cpu) || idle_cpu(cpu))
|
||||
return false;
|
||||
|
||||
j = jiffies - READ_ONCE(rdp->rcuc_activity);
|
||||
|
||||
if (jp)
|
||||
*jp = j;
|
||||
@ -434,6 +446,9 @@ static void print_cpu_stall_info(int cpu)
|
||||
struct rcu_data *rdp = per_cpu_ptr(&rcu_data, cpu);
|
||||
char *ticks_title;
|
||||
unsigned long ticks_value;
|
||||
bool rcuc_starved;
|
||||
unsigned long j;
|
||||
char buf[32];
|
||||
|
||||
/*
|
||||
* We could be printing a lot while holding a spinlock. Avoid
|
||||
@ -450,8 +465,11 @@ static void print_cpu_stall_info(int cpu)
|
||||
}
|
||||
delta = rcu_seq_ctr(rdp->mynode->gp_seq - rdp->rcu_iw_gp_seq);
|
||||
falsepositive = rcu_is_gp_kthread_starving(NULL) &&
|
||||
rcu_dynticks_in_eqs(rcu_dynticks_snap(rdp));
|
||||
pr_err("\t%d-%c%c%c%c: (%lu %s) idle=%03x/%ld/%#lx softirq=%u/%u fqs=%ld %s\n",
|
||||
rcu_dynticks_in_eqs(rcu_dynticks_snap(cpu));
|
||||
rcuc_starved = rcu_is_rcuc_kthread_starving(rdp, &j);
|
||||
if (rcuc_starved)
|
||||
sprintf(buf, " rcuc=%ld jiffies(starved)", j);
|
||||
pr_err("\t%d-%c%c%c%c: (%lu %s) idle=%04x/%ld/%#lx softirq=%u/%u fqs=%ld%s%s\n",
|
||||
cpu,
|
||||
"O."[!!cpu_online(cpu)],
|
||||
"o."[!!(rdp->grpmask & rdp->mynode->qsmaskinit)],
|
||||
@ -460,36 +478,14 @@ static void print_cpu_stall_info(int cpu)
|
||||
rdp->rcu_iw_pending ? (int)min(delta, 9UL) + '0' :
|
||||
"!."[!delta],
|
||||
ticks_value, ticks_title,
|
||||
rcu_dynticks_snap(rdp) & 0xfff,
|
||||
rdp->dynticks_nesting, rdp->dynticks_nmi_nesting,
|
||||
rcu_dynticks_snap(cpu) & 0xffff,
|
||||
ct_dynticks_nesting_cpu(cpu), ct_dynticks_nmi_nesting_cpu(cpu),
|
||||
rdp->softirq_snap, kstat_softirqs_cpu(RCU_SOFTIRQ, cpu),
|
||||
data_race(rcu_state.n_force_qs) - rcu_state.n_force_qs_gpstart,
|
||||
rcuc_starved ? buf : "",
|
||||
falsepositive ? " (false positive?)" : "");
|
||||
}
|
||||
|
||||
static void rcuc_kthread_dump(struct rcu_data *rdp)
|
||||
{
|
||||
int cpu;
|
||||
unsigned long j;
|
||||
struct task_struct *rcuc;
|
||||
|
||||
rcuc = rdp->rcu_cpu_kthread_task;
|
||||
if (!rcuc)
|
||||
return;
|
||||
|
||||
cpu = task_cpu(rcuc);
|
||||
if (cpu_is_offline(cpu) || idle_cpu(cpu))
|
||||
return;
|
||||
|
||||
if (!rcu_is_rcuc_kthread_starving(rdp, &j))
|
||||
return;
|
||||
|
||||
pr_err("%s kthread starved for %ld jiffies\n", rcuc->comm, j);
|
||||
sched_show_task(rcuc);
|
||||
if (!trigger_single_cpu_backtrace(cpu))
|
||||
dump_cpu_task(cpu);
|
||||
}
|
||||
|
||||
/* Complain about starvation of grace-period kthread. */
|
||||
static void rcu_check_gp_kthread_starvation(void)
|
||||
{
|
||||
@ -662,9 +658,6 @@ static void print_cpu_stall(unsigned long gps)
|
||||
rcu_check_gp_kthread_expired_fqs_timer();
|
||||
rcu_check_gp_kthread_starvation();
|
||||
|
||||
if (!use_softirq)
|
||||
rcuc_kthread_dump(rdp);
|
||||
|
||||
rcu_dump_cpu_stacks();
|
||||
|
||||
raw_spin_lock_irqsave_rcu_node(rnp, flags);
|
||||
|
@ -85,7 +85,7 @@ module_param(rcu_normal_after_boot, int, 0444);
|
||||
* and while lockdep is disabled.
|
||||
*
|
||||
* Note that if the CPU is in the idle loop from an RCU point of view (ie:
|
||||
* that we are in the section between rcu_idle_enter() and rcu_idle_exit())
|
||||
* that we are in the section between ct_idle_enter() and ct_idle_exit())
|
||||
* then rcu_read_lock_held() sets ``*ret`` to false even if the CPU did an
|
||||
* rcu_read_lock(). The reason for this is that RCU ignores CPUs that are
|
||||
* in such a section, considering these as in extended quiescent state,
|
||||
|
@ -6591,7 +6591,7 @@ void __sched schedule_idle(void)
|
||||
} while (need_resched());
|
||||
}
|
||||
|
||||
#if defined(CONFIG_CONTEXT_TRACKING) && !defined(CONFIG_HAVE_CONTEXT_TRACKING_OFFSTACK)
|
||||
#if defined(CONFIG_CONTEXT_TRACKING_USER) && !defined(CONFIG_HAVE_CONTEXT_TRACKING_USER_OFFSTACK)
|
||||
asmlinkage __visible void __sched schedule_user(void)
|
||||
{
|
||||
/*
|
||||
|
@ -53,14 +53,14 @@ static noinline int __cpuidle cpu_idle_poll(void)
|
||||
{
|
||||
trace_cpu_idle(0, smp_processor_id());
|
||||
stop_critical_timings();
|
||||
rcu_idle_enter();
|
||||
ct_idle_enter();
|
||||
local_irq_enable();
|
||||
|
||||
while (!tif_need_resched() &&
|
||||
(cpu_idle_force_poll || tick_check_broadcast_expired()))
|
||||
cpu_relax();
|
||||
|
||||
rcu_idle_exit();
|
||||
ct_idle_exit();
|
||||
start_critical_timings();
|
||||
trace_cpu_idle(PWR_EVENT_EXIT, smp_processor_id());
|
||||
|
||||
@ -98,12 +98,12 @@ void __cpuidle default_idle_call(void)
|
||||
*
|
||||
* Trace IRQs enable here, then switch off RCU, and have
|
||||
* arch_cpu_idle() use raw_local_irq_enable(). Note that
|
||||
* rcu_idle_enter() relies on lockdep IRQ state, so switch that
|
||||
* ct_idle_enter() relies on lockdep IRQ state, so switch that
|
||||
* last -- this is very similar to the entry code.
|
||||
*/
|
||||
trace_hardirqs_on_prepare();
|
||||
lockdep_hardirqs_on_prepare();
|
||||
rcu_idle_enter();
|
||||
ct_idle_enter();
|
||||
lockdep_hardirqs_on(_THIS_IP_);
|
||||
|
||||
arch_cpu_idle();
|
||||
@ -116,7 +116,7 @@ void __cpuidle default_idle_call(void)
|
||||
*/
|
||||
raw_local_irq_disable();
|
||||
lockdep_hardirqs_off(_THIS_IP_);
|
||||
rcu_idle_exit();
|
||||
ct_idle_exit();
|
||||
lockdep_hardirqs_on(_THIS_IP_);
|
||||
raw_local_irq_enable();
|
||||
|
||||
|
@ -27,6 +27,7 @@
|
||||
#include <linux/capability.h>
|
||||
#include <linux/cgroup_api.h>
|
||||
#include <linux/cgroup.h>
|
||||
#include <linux/context_tracking.h>
|
||||
#include <linux/cpufreq.h>
|
||||
#include <linux/cpumask_api.h>
|
||||
#include <linux/ctype.h>
|
||||
|
@ -620,7 +620,7 @@ void irq_enter_rcu(void)
|
||||
*/
|
||||
void irq_enter(void)
|
||||
{
|
||||
rcu_irq_enter();
|
||||
ct_irq_enter();
|
||||
irq_enter_rcu();
|
||||
}
|
||||
|
||||
@ -672,7 +672,7 @@ void irq_exit_rcu(void)
|
||||
void irq_exit(void)
|
||||
{
|
||||
__irq_exit_rcu();
|
||||
rcu_irq_exit();
|
||||
ct_irq_exit();
|
||||
/* must be last! */
|
||||
lockdep_hardirq_exit();
|
||||
}
|
||||
|
@ -73,6 +73,15 @@ config TIME_KUNIT_TEST
|
||||
|
||||
If unsure, say N.
|
||||
|
||||
config CONTEXT_TRACKING
|
||||
bool
|
||||
|
||||
config CONTEXT_TRACKING_IDLE
|
||||
bool
|
||||
select CONTEXT_TRACKING
|
||||
help
|
||||
Tracks idle state on behalf of RCU.
|
||||
|
||||
if GENERIC_CLOCKEVENTS
|
||||
menu "Timers subsystem"
|
||||
|
||||
@ -111,7 +120,7 @@ config NO_HZ_FULL
|
||||
# NO_HZ_COMMON dependency
|
||||
# We need at least one periodic CPU for timekeeping
|
||||
depends on SMP
|
||||
depends on HAVE_CONTEXT_TRACKING
|
||||
depends on HAVE_CONTEXT_TRACKING_USER
|
||||
# VIRT_CPU_ACCOUNTING_GEN dependency
|
||||
depends on HAVE_VIRT_CPU_ACCOUNTING_GEN
|
||||
select NO_HZ_COMMON
|
||||
@ -137,31 +146,37 @@ config NO_HZ_FULL
|
||||
|
||||
endchoice
|
||||
|
||||
config CONTEXT_TRACKING
|
||||
bool
|
||||
config CONTEXT_TRACKING_USER
|
||||
bool
|
||||
depends on HAVE_CONTEXT_TRACKING_USER
|
||||
select CONTEXT_TRACKING
|
||||
help
|
||||
Track transitions between kernel and user on behalf of RCU and
|
||||
tickless cputime accounting. The former case relies on context
|
||||
tracking to enter/exit RCU extended quiescent states.
|
||||
|
||||
config CONTEXT_TRACKING_FORCE
|
||||
bool "Force context tracking"
|
||||
depends on CONTEXT_TRACKING
|
||||
config CONTEXT_TRACKING_USER_FORCE
|
||||
bool "Force user context tracking"
|
||||
depends on CONTEXT_TRACKING_USER
|
||||
default y if !NO_HZ_FULL
|
||||
help
|
||||
The major pre-requirement for full dynticks to work is to
|
||||
support the context tracking subsystem. But there are also
|
||||
support the user context tracking subsystem. But there are also
|
||||
other dependencies to provide in order to make the full
|
||||
dynticks working.
|
||||
|
||||
This option stands for testing when an arch implements the
|
||||
context tracking backend but doesn't yet fulfill all the
|
||||
user context tracking backend but doesn't yet fulfill all the
|
||||
requirements to make the full dynticks feature working.
|
||||
Without the full dynticks, there is no way to test the support
|
||||
for context tracking and the subsystems that rely on it: RCU
|
||||
for user context tracking and the subsystems that rely on it: RCU
|
||||
userspace extended quiescent state and tickless cputime
|
||||
accounting. This option copes with the absence of the full
|
||||
dynticks subsystem by forcing the context tracking on all
|
||||
dynticks subsystem by forcing the user context tracking on all
|
||||
CPUs in the system.
|
||||
|
||||
Say Y only if you're working on the development of an
|
||||
architecture backend for the context tracking.
|
||||
architecture backend for the user context tracking.
|
||||
|
||||
Say N otherwise, this option brings an overhead that you
|
||||
don't want in production.
|
||||
|
@ -571,7 +571,7 @@ void __init tick_nohz_init(void)
|
||||
}
|
||||
|
||||
for_each_cpu(cpu, tick_nohz_full_mask)
|
||||
context_tracking_cpu_set(cpu);
|
||||
ct_cpu_track_user(cpu);
|
||||
|
||||
ret = cpuhp_setup_state_nocalls(CPUHP_AP_ONLINE_DYN,
|
||||
"kernel/nohz:predown", NULL,
|
||||
|
@ -3105,17 +3105,17 @@ void __trace_stack(struct trace_array *tr, unsigned int trace_ctx,
|
||||
}
|
||||
|
||||
/*
|
||||
* When an NMI triggers, RCU is enabled via rcu_nmi_enter(),
|
||||
* When an NMI triggers, RCU is enabled via ct_nmi_enter(),
|
||||
* but if the above rcu_is_watching() failed, then the NMI
|
||||
* triggered someplace critical, and rcu_irq_enter() should
|
||||
* triggered someplace critical, and ct_irq_enter() should
|
||||
* not be called from NMI.
|
||||
*/
|
||||
if (unlikely(in_nmi()))
|
||||
return;
|
||||
|
||||
rcu_irq_enter_irqson();
|
||||
ct_irq_enter_irqson();
|
||||
__ftrace_trace_stack(buffer, trace_ctx, skip, NULL);
|
||||
rcu_irq_exit_irqson();
|
||||
ct_irq_exit_irqson();
|
||||
}
|
||||
|
||||
/**
|
||||
|
Loading…
x
Reference in New Issue
Block a user