Merge branches 'doc.2021.11.30c', 'exp.2021.12.07a', 'fastnohz.2021.11.30c', 'fixes.2021.11.30c', 'nocb.2021.12.09a', 'nolibc.2021.11.30c', 'tasks.2021.12.09a', 'torture.2021.12.07a' and 'torturescript.2021.11.30c' into HEAD
doc.2021.11.30c: Documentation updates. exp.2021.12.07a: Expedited-grace-period fixes. fastnohz.2021.11.30c: Remove CONFIG_RCU_FAST_NO_HZ. fixes.2021.11.30c: Miscellaneous fixes. nocb.2021.12.09a: No-CB CPU updates. nolibc.2021.11.30c: Tiny in-kernel library updates. tasks.2021.12.09a: RCU-tasks updates, including update-side scalability. torture.2021.12.07a: Torture-test in-kernel module updates. torturescript.2021.11.30c: Torture-test scripting updates.
This commit is contained in:
@ -16,7 +16,7 @@
|
||||
static bool rcu_rdp_is_offloaded(struct rcu_data *rdp)
|
||||
{
|
||||
/*
|
||||
* In order to read the offloaded state of an rdp is a safe
|
||||
* In order to read the offloaded state of an rdp in a safe
|
||||
* and stable way and prevent from its value to be changed
|
||||
* under us, we must either hold the barrier mutex, the cpu
|
||||
* hotplug lock (read or write) or the nocb lock. Local
|
||||
@ -51,12 +51,10 @@ static void __init rcu_bootup_announce_oddness(void)
|
||||
RCU_FANOUT);
|
||||
if (rcu_fanout_exact)
|
||||
pr_info("\tHierarchical RCU autobalancing is disabled.\n");
|
||||
if (IS_ENABLED(CONFIG_RCU_FAST_NO_HZ))
|
||||
pr_info("\tRCU dyntick-idle grace-period acceleration is enabled.\n");
|
||||
if (IS_ENABLED(CONFIG_PROVE_RCU))
|
||||
pr_info("\tRCU lockdep checking is enabled.\n");
|
||||
if (IS_ENABLED(CONFIG_RCU_STRICT_GRACE_PERIOD))
|
||||
pr_info("\tRCU strict (and thus non-scalable) grace periods enabled.\n");
|
||||
pr_info("\tRCU strict (and thus non-scalable) grace periods are enabled.\n");
|
||||
if (RCU_NUM_LVLS >= 4)
|
||||
pr_info("\tFour(or more)-level hierarchy is enabled.\n");
|
||||
if (RCU_FANOUT_LEAF != 16)
|
||||
@ -88,13 +86,13 @@ static void __init rcu_bootup_announce_oddness(void)
|
||||
if (rcu_kick_kthreads)
|
||||
pr_info("\tKick kthreads if too-long grace period.\n");
|
||||
if (IS_ENABLED(CONFIG_DEBUG_OBJECTS_RCU_HEAD))
|
||||
pr_info("\tRCU callback double-/use-after-free debug enabled.\n");
|
||||
pr_info("\tRCU callback double-/use-after-free debug is enabled.\n");
|
||||
if (gp_preinit_delay)
|
||||
pr_info("\tRCU debug GP pre-init slowdown %d jiffies.\n", gp_preinit_delay);
|
||||
if (gp_init_delay)
|
||||
pr_info("\tRCU debug GP init slowdown %d jiffies.\n", gp_init_delay);
|
||||
if (gp_cleanup_delay)
|
||||
pr_info("\tRCU debug GP init slowdown %d jiffies.\n", gp_cleanup_delay);
|
||||
pr_info("\tRCU debug GP cleanup slowdown %d jiffies.\n", gp_cleanup_delay);
|
||||
if (!use_softirq)
|
||||
pr_info("\tRCU_SOFTIRQ processing moved to rcuc kthreads.\n");
|
||||
if (IS_ENABLED(CONFIG_RCU_EQS_DEBUG))
|
||||
@ -260,10 +258,10 @@ static void rcu_preempt_ctxt_queue(struct rcu_node *rnp, struct rcu_data *rdp)
|
||||
* no need to check for a subsequent expedited GP. (Though we are
|
||||
* still in a quiescent state in any case.)
|
||||
*/
|
||||
if (blkd_state & RCU_EXP_BLKD && rdp->exp_deferred_qs)
|
||||
if (blkd_state & RCU_EXP_BLKD && rdp->cpu_no_qs.b.exp)
|
||||
rcu_report_exp_rdp(rdp);
|
||||
else
|
||||
WARN_ON_ONCE(rdp->exp_deferred_qs);
|
||||
WARN_ON_ONCE(rdp->cpu_no_qs.b.exp);
|
||||
}
|
||||
|
||||
/*
|
||||
@ -277,12 +275,16 @@ static void rcu_preempt_ctxt_queue(struct rcu_node *rnp, struct rcu_data *rdp)
|
||||
* current task, there might be any number of other tasks blocked while
|
||||
* in an RCU read-side critical section.
|
||||
*
|
||||
* Unlike non-preemptible-RCU, quiescent state reports for expedited
|
||||
* grace periods are handled separately via deferred quiescent states
|
||||
* and context switch events.
|
||||
*
|
||||
* Callers to this function must disable preemption.
|
||||
*/
|
||||
static void rcu_qs(void)
|
||||
{
|
||||
RCU_LOCKDEP_WARN(preemptible(), "rcu_qs() invoked with preemption enabled!!!\n");
|
||||
if (__this_cpu_read(rcu_data.cpu_no_qs.s)) {
|
||||
if (__this_cpu_read(rcu_data.cpu_no_qs.b.norm)) {
|
||||
trace_rcu_grace_period(TPS("rcu_preempt"),
|
||||
__this_cpu_read(rcu_data.gp_seq),
|
||||
TPS("cpuqs"));
|
||||
@ -350,7 +352,7 @@ void rcu_note_context_switch(bool preempt)
|
||||
* means that we continue to block the current grace period.
|
||||
*/
|
||||
rcu_qs();
|
||||
if (rdp->exp_deferred_qs)
|
||||
if (rdp->cpu_no_qs.b.exp)
|
||||
rcu_report_exp_rdp(rdp);
|
||||
rcu_tasks_qs(current, preempt);
|
||||
trace_rcu_utilization(TPS("End context switch"));
|
||||
@ -477,7 +479,7 @@ rcu_preempt_deferred_qs_irqrestore(struct task_struct *t, unsigned long flags)
|
||||
*/
|
||||
special = t->rcu_read_unlock_special;
|
||||
rdp = this_cpu_ptr(&rcu_data);
|
||||
if (!special.s && !rdp->exp_deferred_qs) {
|
||||
if (!special.s && !rdp->cpu_no_qs.b.exp) {
|
||||
local_irq_restore(flags);
|
||||
return;
|
||||
}
|
||||
@ -497,7 +499,7 @@ rcu_preempt_deferred_qs_irqrestore(struct task_struct *t, unsigned long flags)
|
||||
* tasks are handled when removing the task from the
|
||||
* blocked-tasks list below.
|
||||
*/
|
||||
if (rdp->exp_deferred_qs)
|
||||
if (rdp->cpu_no_qs.b.exp)
|
||||
rcu_report_exp_rdp(rdp);
|
||||
|
||||
/* Clean up if blocked during RCU read-side critical section. */
|
||||
@ -580,7 +582,7 @@ rcu_preempt_deferred_qs_irqrestore(struct task_struct *t, unsigned long flags)
|
||||
*/
|
||||
static bool rcu_preempt_need_deferred_qs(struct task_struct *t)
|
||||
{
|
||||
return (__this_cpu_read(rcu_data.exp_deferred_qs) ||
|
||||
return (__this_cpu_read(rcu_data.cpu_no_qs.b.exp) ||
|
||||
READ_ONCE(t->rcu_read_unlock_special.s)) &&
|
||||
rcu_preempt_depth() == 0;
|
||||
}
|
||||
@ -642,7 +644,7 @@ static void rcu_read_unlock_special(struct task_struct *t)
|
||||
(IS_ENABLED(CONFIG_RCU_BOOST) && irqs_were_disabled &&
|
||||
t->rcu_blocked_node);
|
||||
// Need to defer quiescent state until everything is enabled.
|
||||
if (use_softirq && (in_irq() || (expboost && !irqs_were_disabled))) {
|
||||
if (use_softirq && (in_hardirq() || (expboost && !irqs_were_disabled))) {
|
||||
// Using softirq, safe to awaken, and either the
|
||||
// wakeup is free or there is either an expedited
|
||||
// GP in flight or a potential need to deboost.
|
||||
@ -845,10 +847,8 @@ static void rcu_qs(void)
|
||||
trace_rcu_grace_period(TPS("rcu_sched"),
|
||||
__this_cpu_read(rcu_data.gp_seq), TPS("cpuqs"));
|
||||
__this_cpu_write(rcu_data.cpu_no_qs.b.norm, false);
|
||||
if (!__this_cpu_read(rcu_data.cpu_no_qs.b.exp))
|
||||
return;
|
||||
__this_cpu_write(rcu_data.cpu_no_qs.b.exp, false);
|
||||
rcu_report_exp_rdp(this_cpu_ptr(&rcu_data));
|
||||
if (__this_cpu_read(rcu_data.cpu_no_qs.b.exp))
|
||||
rcu_report_exp_rdp(this_cpu_ptr(&rcu_data));
|
||||
}
|
||||
|
||||
/*
|
||||
@ -925,7 +925,18 @@ static bool rcu_preempt_need_deferred_qs(struct task_struct *t)
|
||||
{
|
||||
return false;
|
||||
}
|
||||
static void rcu_preempt_deferred_qs(struct task_struct *t) { }
|
||||
|
||||
// Except that we do need to respond to a request by an expedited grace
|
||||
// period for a quiescent state from this CPU. Note that requests from
|
||||
// tasks are handled when removing the task from the blocked-tasks list
|
||||
// below.
|
||||
static void rcu_preempt_deferred_qs(struct task_struct *t)
|
||||
{
|
||||
struct rcu_data *rdp = this_cpu_ptr(&rcu_data);
|
||||
|
||||
if (rdp->cpu_no_qs.b.exp)
|
||||
rcu_report_exp_rdp(rdp);
|
||||
}
|
||||
|
||||
/*
|
||||
* Because there is no preemptible RCU, there can be no readers blocked,
|
||||
@ -1153,7 +1164,6 @@ static void rcu_preempt_boost_start_gp(struct rcu_node *rnp)
|
||||
/*
|
||||
* Create an RCU-boost kthread for the specified node if one does not
|
||||
* already exist. We only create this kthread for preemptible RCU.
|
||||
* Returns zero if all is well, a negated errno otherwise.
|
||||
*/
|
||||
static void rcu_spawn_one_boost_kthread(struct rcu_node *rnp)
|
||||
{
|
||||
@ -1204,8 +1214,9 @@ static void rcu_boost_kthread_setaffinity(struct rcu_node *rnp, int outgoingcpu)
|
||||
if ((mask & leaf_node_cpu_bit(rnp, cpu)) &&
|
||||
cpu != outgoingcpu)
|
||||
cpumask_set_cpu(cpu, cm);
|
||||
cpumask_and(cm, cm, housekeeping_cpumask(HK_FLAG_RCU));
|
||||
if (cpumask_weight(cm) == 0)
|
||||
cpumask_setall(cm);
|
||||
cpumask_copy(cm, housekeeping_cpumask(HK_FLAG_RCU));
|
||||
set_cpus_allowed_ptr(t, cm);
|
||||
free_cpumask_var(cm);
|
||||
}
|
||||
@ -1253,201 +1264,6 @@ static void __init rcu_spawn_boost_kthreads(void)
|
||||
|
||||
#endif /* #else #ifdef CONFIG_RCU_BOOST */
|
||||
|
||||
#if !defined(CONFIG_RCU_FAST_NO_HZ)
|
||||
|
||||
/*
|
||||
* Check to see if any future non-offloaded RCU-related work will need
|
||||
* to be done by the current CPU, even if none need be done immediately,
|
||||
* returning 1 if so. This function is part of the RCU implementation;
|
||||
* it is -not- an exported member of the RCU API.
|
||||
*
|
||||
* Because we not have RCU_FAST_NO_HZ, just check whether or not this
|
||||
* CPU has RCU callbacks queued.
|
||||
*/
|
||||
int rcu_needs_cpu(u64 basemono, u64 *nextevt)
|
||||
{
|
||||
*nextevt = KTIME_MAX;
|
||||
return !rcu_segcblist_empty(&this_cpu_ptr(&rcu_data)->cblist) &&
|
||||
!rcu_rdp_is_offloaded(this_cpu_ptr(&rcu_data));
|
||||
}
|
||||
|
||||
/*
|
||||
* Because we do not have RCU_FAST_NO_HZ, don't bother cleaning up
|
||||
* after it.
|
||||
*/
|
||||
static void rcu_cleanup_after_idle(void)
|
||||
{
|
||||
}
|
||||
|
||||
/*
|
||||
* Do the idle-entry grace-period work, which, because CONFIG_RCU_FAST_NO_HZ=n,
|
||||
* is nothing.
|
||||
*/
|
||||
static void rcu_prepare_for_idle(void)
|
||||
{
|
||||
}
|
||||
|
||||
#else /* #if !defined(CONFIG_RCU_FAST_NO_HZ) */
|
||||
|
||||
/*
|
||||
* This code is invoked when a CPU goes idle, at which point we want
|
||||
* to have the CPU do everything required for RCU so that it can enter
|
||||
* the energy-efficient dyntick-idle mode.
|
||||
*
|
||||
* The following preprocessor symbol controls this:
|
||||
*
|
||||
* RCU_IDLE_GP_DELAY gives the number of jiffies that a CPU is permitted
|
||||
* to sleep in dyntick-idle mode with RCU callbacks pending. This
|
||||
* is sized to be roughly one RCU grace period. Those energy-efficiency
|
||||
* benchmarkers who might otherwise be tempted to set this to a large
|
||||
* number, be warned: Setting RCU_IDLE_GP_DELAY too high can hang your
|
||||
* system. And if you are -that- concerned about energy efficiency,
|
||||
* just power the system down and be done with it!
|
||||
*
|
||||
* The value below works well in practice. If future workloads require
|
||||
* adjustment, they can be converted into kernel config parameters, though
|
||||
* making the state machine smarter might be a better option.
|
||||
*/
|
||||
#define RCU_IDLE_GP_DELAY 4 /* Roughly one grace period. */
|
||||
|
||||
static int rcu_idle_gp_delay = RCU_IDLE_GP_DELAY;
|
||||
module_param(rcu_idle_gp_delay, int, 0644);
|
||||
|
||||
/*
|
||||
* Try to advance callbacks on the current CPU, but only if it has been
|
||||
* awhile since the last time we did so. Afterwards, if there are any
|
||||
* callbacks ready for immediate invocation, return true.
|
||||
*/
|
||||
static bool __maybe_unused rcu_try_advance_all_cbs(void)
|
||||
{
|
||||
bool cbs_ready = false;
|
||||
struct rcu_data *rdp = this_cpu_ptr(&rcu_data);
|
||||
struct rcu_node *rnp;
|
||||
|
||||
/* Exit early if we advanced recently. */
|
||||
if (jiffies == rdp->last_advance_all)
|
||||
return false;
|
||||
rdp->last_advance_all = jiffies;
|
||||
|
||||
rnp = rdp->mynode;
|
||||
|
||||
/*
|
||||
* Don't bother checking unless a grace period has
|
||||
* completed since we last checked and there are
|
||||
* callbacks not yet ready to invoke.
|
||||
*/
|
||||
if ((rcu_seq_completed_gp(rdp->gp_seq,
|
||||
rcu_seq_current(&rnp->gp_seq)) ||
|
||||
unlikely(READ_ONCE(rdp->gpwrap))) &&
|
||||
rcu_segcblist_pend_cbs(&rdp->cblist))
|
||||
note_gp_changes(rdp);
|
||||
|
||||
if (rcu_segcblist_ready_cbs(&rdp->cblist))
|
||||
cbs_ready = true;
|
||||
return cbs_ready;
|
||||
}
|
||||
|
||||
/*
|
||||
* Allow the CPU to enter dyntick-idle mode unless it has callbacks ready
|
||||
* to invoke. If the CPU has callbacks, try to advance them. Tell the
|
||||
* caller about what to set the timeout.
|
||||
*
|
||||
* The caller must have disabled interrupts.
|
||||
*/
|
||||
int rcu_needs_cpu(u64 basemono, u64 *nextevt)
|
||||
{
|
||||
struct rcu_data *rdp = this_cpu_ptr(&rcu_data);
|
||||
unsigned long dj;
|
||||
|
||||
lockdep_assert_irqs_disabled();
|
||||
|
||||
/* If no non-offloaded callbacks, RCU doesn't need the CPU. */
|
||||
if (rcu_segcblist_empty(&rdp->cblist) ||
|
||||
rcu_rdp_is_offloaded(rdp)) {
|
||||
*nextevt = KTIME_MAX;
|
||||
return 0;
|
||||
}
|
||||
|
||||
/* Attempt to advance callbacks. */
|
||||
if (rcu_try_advance_all_cbs()) {
|
||||
/* Some ready to invoke, so initiate later invocation. */
|
||||
invoke_rcu_core();
|
||||
return 1;
|
||||
}
|
||||
rdp->last_accelerate = jiffies;
|
||||
|
||||
/* Request timer and round. */
|
||||
dj = round_up(rcu_idle_gp_delay + jiffies, rcu_idle_gp_delay) - jiffies;
|
||||
|
||||
*nextevt = basemono + dj * TICK_NSEC;
|
||||
return 0;
|
||||
}
|
||||
|
||||
/*
|
||||
* Prepare a CPU for idle from an RCU perspective. The first major task is to
|
||||
* sense whether nohz mode has been enabled or disabled via sysfs. The second
|
||||
* major task is to accelerate (that is, assign grace-period numbers to) any
|
||||
* recently arrived callbacks.
|
||||
*
|
||||
* The caller must have disabled interrupts.
|
||||
*/
|
||||
static void rcu_prepare_for_idle(void)
|
||||
{
|
||||
bool needwake;
|
||||
struct rcu_data *rdp = this_cpu_ptr(&rcu_data);
|
||||
struct rcu_node *rnp;
|
||||
int tne;
|
||||
|
||||
lockdep_assert_irqs_disabled();
|
||||
if (rcu_rdp_is_offloaded(rdp))
|
||||
return;
|
||||
|
||||
/* Handle nohz enablement switches conservatively. */
|
||||
tne = READ_ONCE(tick_nohz_active);
|
||||
if (tne != rdp->tick_nohz_enabled_snap) {
|
||||
if (!rcu_segcblist_empty(&rdp->cblist))
|
||||
invoke_rcu_core(); /* force nohz to see update. */
|
||||
rdp->tick_nohz_enabled_snap = tne;
|
||||
return;
|
||||
}
|
||||
if (!tne)
|
||||
return;
|
||||
|
||||
/*
|
||||
* If we have not yet accelerated this jiffy, accelerate all
|
||||
* callbacks on this CPU.
|
||||
*/
|
||||
if (rdp->last_accelerate == jiffies)
|
||||
return;
|
||||
rdp->last_accelerate = jiffies;
|
||||
if (rcu_segcblist_pend_cbs(&rdp->cblist)) {
|
||||
rnp = rdp->mynode;
|
||||
raw_spin_lock_rcu_node(rnp); /* irqs already disabled. */
|
||||
needwake = rcu_accelerate_cbs(rnp, rdp);
|
||||
raw_spin_unlock_rcu_node(rnp); /* irqs remain disabled. */
|
||||
if (needwake)
|
||||
rcu_gp_kthread_wake();
|
||||
}
|
||||
}
|
||||
|
||||
/*
|
||||
* Clean up for exit from idle. Attempt to advance callbacks based on
|
||||
* any grace periods that elapsed while the CPU was idle, and if any
|
||||
* callbacks are now ready to invoke, initiate invocation.
|
||||
*/
|
||||
static void rcu_cleanup_after_idle(void)
|
||||
{
|
||||
struct rcu_data *rdp = this_cpu_ptr(&rcu_data);
|
||||
|
||||
lockdep_assert_irqs_disabled();
|
||||
if (rcu_rdp_is_offloaded(rdp))
|
||||
return;
|
||||
if (rcu_try_advance_all_cbs())
|
||||
invoke_rcu_core();
|
||||
}
|
||||
|
||||
#endif /* #else #if !defined(CONFIG_RCU_FAST_NO_HZ) */
|
||||
|
||||
/*
|
||||
* Is this CPU a NO_HZ_FULL CPU that should ignore RCU so that the
|
||||
* grace-period kthread will do force_quiescent_state() processing?
|
||||
@ -1455,7 +1271,7 @@ static void rcu_cleanup_after_idle(void)
|
||||
* CPU unless the grace period has extended for too long.
|
||||
*
|
||||
* This code relies on the fact that all NO_HZ_FULL CPUs are also
|
||||
* CONFIG_RCU_NOCB_CPU CPUs.
|
||||
* RCU_NOCB_CPU CPUs.
|
||||
*/
|
||||
static bool rcu_nohz_full_cpu(void)
|
||||
{
|
||||
|
Reference in New Issue
Block a user